In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import mpl_toolkits as mplot3d
import scipy as sp
pd.set_option("display.max_columns", None)
from tqdm import tqdm
tqdm.pandas()
In [2]:
def lighten_color(color, amount=0.5):
"""
Lightens the given color by multiplying (1-luminosity) by the given amount.
Input can be matplotlib color string, hex string, or RGB tuple.
Examples:
>> lighten_color('g', 0.3)
>> lighten_color('#F034A3', 0.6)
>> lighten_color((.3,.55,.1), 0.5)
"""
import matplotlib.colors as mc
import colorsys
try:
c = mc.cnames[color]
except:
c = color
c = colorsys.rgb_to_hls(*mc.to_rgb(c))
return colorsys.hls_to_rgb(c[0], 1 - amount * (1 - c[1]), c[2])
DATA CLEANING AND WRANGLING¶
In [3]:
# DF1 = Total and facet scores, and score per question
df0 = pd.read_csv('Indian Risk Survey_Besample_Filtered_12_2024.csv')
df1 = df0.iloc[:, 19:62].copy()
df1.insert(0, "ResponseId",0)
df1["ResponseId"] = df0["ResponseId"].copy()
df1
Out[3]:
| ResponseId | Q0 | Q1 | Q2 | Q3 | Q4 | Q5_1 | Q6_1 | Q8_1 | Q8_2 | Q8_3 | Q8_4 | Q8_5 | Q8_6 | Q8_7 | Q8_8 | Q9b_1 | Q9b_4 | Q9b_5 | Q9b_6 | Q9b_7 | Q9b_8 | Q9b_9 | Q11a_1 | Q11a_2 | Q11a_3 | Q11b | Q11c | Q11d | Q11h | Attention Check | Q11i | Q12a | Q12b | Q12c | Q13a | Q13b | Q13c | Q13d | Q14a | Q14b | Q14c | Q15a | Q14b.1 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R_4RadSQzbD3kAfEP | Yes, I would like to participate in the study ... | Male | 18 | Secondary School (11th to 12th Std.) | Student | 4 | 7 | 3 | 3 | 3 | 4 | 5 | 4 | 3 | 4 | 25.0 | 15.0 | 10 | 10 | 10.0 | 10 | 20.0 | 2 | 2 | 2 | HIV test,Professional horoscope | Irrespective of the quality of the study, futu... | The growth rate over five years will be betwee... | 5 in 100 people prior to the intervention to 6... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 10 | 0.10 | 9 out of 10 | 25.0 | 20 | They are equal | INR 1500,001 – INR 30,00,000 | 10000000.0 |
| 1 | R_4F8RqnI7xnwXmBY | Yes, I would like to participate in the study ... | Male | 41 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 6 | 9 | 4 | 4 | 4 | 4 | 4 | 5 | 4 | 5 | 10.0 | 30.0 | 0 | 0 | 20.0 | 40 | 0.0 | 2 | 1 | 1 | HIV test,Fingerprint,DNA test | Irrespective of the quality of the study, futu... | It is not possible to predict the growth rate ... | 100 in 10,000 people prior to the intervention... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 10 | 0.10 | 59 out of 1000 | 25.0 | 20 | Can’t say | INR 500,001 – INR 15,00,000 | 5000000.0 |
| 2 | R_8taN3wUPbeZMxzM | Yes, I would like to participate in the study ... | Male | 20 | Undergraduate Program | Student | 3 | 0 (no investment experience) | 6 (strongly agree) | 6 (strongly agree) | 4 | 1 (strongly disagree) | 1 (strongly disagree) | 2 | 1 (strongly disagree) | 6 (strongly agree) | 30.0 | 40.0 | 10 | 10 | 0.0 | 5 | 5.0 | 1 | 1 | 1 | Fingerprint | The higher the quality of the study, the more ... | It is not possible to predict the growth rate ... | 70 in 100 people prior to the intervention to ... | Vase | The medication increases recovery by 50% | More than $102 | More than today with the money in this account | Do not know | 1000 | 10 | 50.00 | 9 out of 59 | 50.0 | 20 | They are equal | < INR 500,000 | 10000.0 |
| 3 | R_4KT4eUqEbo2q5s9 | Yes, I would like to participate in the study ... | Male | 18 | Secondary School (11th to 12th Std.) | Student | 5 | 0 (no investment experience) | 6 (strongly agree) | 6 (strongly agree) | 4 | 4 | 1 (strongly disagree) | 6 (strongly agree) | 4 | 6 (strongly agree) | 0.0 | 0.0 | 0 | 20 | 40.0 | 40 | 0.0 | 1 | 1 | 2 | Fingerprint,DNA test | The higher the quality of the study, the more ... | The growth rate over five years will be exactl... | 100 in 10,000 people prior to the intervention... | Vase | The medication increases recovery by 50% | More than $102 | Less than today with the money in this account | False | 500 | 10 | 0.10 | 59 out of 1000 | 25.0 | 20 | They are equal | < INR 500,000 | 1000000.0 |
| 4 | R_4TMr0yMiNpsJBr5 | Yes, I would like to participate in the study ... | Male | 37 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 3 | 1 | 4 | 5 | 5 | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 2 | 70.0 | 10.0 | 0 | 0 | 0.0 | 10 | 10.0 | 2 | 1 | 1 | HIV test,Fingerprint,DNA test,Cancer screening... | The higher the quality of the study, the more ... | The growth rate over five years will be exactl... | 100 in 10,000 people prior to the intervention... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 10 | 0.10 | 59 out of 1000 | 25.0 | 20 | They are equal | < INR 500,000 | 1000000.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 365 | R_47eRbAoGWc5Ttzb | Yes, I would like to participate in the study ... | Male | 48 | Post-Graduate Program | Salaried/Employee/Consultant in a sector other... | 0 (unwilling to take risk) | 1 | 4 | 5 | 4 | 5 | 5 | 2 | 3 | 6 (strongly agree) | 50.0 | 10.0 | 10 | 20 | 2.0 | 5 | 3.0 | 1 | 1 | 1 | Professional horoscope | Irrespective of the quality of the study, futu... | The growth rate over five years will be exactl... | 70 in 100 people prior to the intervention to ... | Vase | The medication increases recovery by 2% | Refuse to answer | Exactly the same as today with the money in th... | False | 500 | 68 | 0.01 | 59 out of 100 | 50.0 | 40 | They are equal | < INR 500,000 | 50000.0 |
| 366 | R_4eWLoP4wkX5K1kl | Yes, I would like to participate in the study ... | Male | 22 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 5 | 6 | 5 | 5 | 4 | 5 | 5 | 6 (strongly agree) | 5 | 4 | 70.0 | 15.0 | 0 | 0 | 15.0 | 0 | 0.0 | 2 | 1 | 2 | Cancer screening test | The higher the quality of the study, the more ... | The growth rate over five years will be exactl... | It is not possible to determine which of the a... | Vase | The medication increases recovery by 2% | More than $102 | More than today with the money in this account | False | 500 | 50 | 0.10 | 9 out of 59 | 25.0 | 20 | They are equal | < INR 500,000 | 800000.0 |
| 367 | R_47rbcPZByLBrZFn | Yes, I would like to participate in the study ... | Male | 19 | Diploma and Vocational Training | Student | 5 | 5 | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 5 | 6 (strongly agree) | 5 | 10.0 | 20.0 | 10 | 15 | 25.0 | 15 | 5.0 | 1 | 2 | 1 | DNA test | Irrespective of the quality of the study, futu... | It is not possible to predict the growth rate ... | It is not possible to determine which of the a... | Vase | The medication increases recovery by 2% | More than $102 | Less than today with the money in this account | False | 345 | 1 | 0.10 | 9 out of 10 | 90.0 | 20 | They are equal | INR 500,001 – INR 15,00,000 | 2500000.0 |
| 368 | R_42y9IJJWALsuHPF | Yes, I would like to participate in the study ... | Male | 28 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 6 | 7 | 6 (strongly agree) | 3 | 5 | 5 | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 5 | 4.0 | 0.0 | 0 | 0 | 6.0 | 40 | 50.0 | 2 | 1 | 1 | HIV test,Fingerprint,DNA test,Cancer screening... | The higher the quality of the study, the more ... | The growth rate over five years will be betwee... | 70 in 100 people prior to the intervention to ... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 600 | 10 | 0.10 | 9 out of 10 | 25.0 | 5 | Hertinol | INR 1500,001 – INR 30,00,000 | 4500000.0 |
| 369 | R_8k6D0jzzHCC5X3Z | Yes, I would like to participate in the study ... | Male | 25 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 7 (willing to take risk) | 8 | 6 (strongly agree) | 5 | 5 | 4 | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 5 | 5.0 | 5.0 | 5 | 5 | 30.0 | 20 | 30.0 | 2 | 1 | 1 | DNA test | The higher the quality of the study, the more ... | It is not possible to predict the growth rate ... | 5 in 100 people prior to the intervention to 6... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 500 | 50.00 | 59 out of 1000 | 25.0 | 10 | Hertinol | > INR 75,00,000 | 8000000.0 |
370 rows × 44 columns
Demographics¶
In [4]:
df1["age"] = df1["Q2"]
# Age groups/rec = [2,3,4]
# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 59 y/o
# Age group 4 = 60 to 75 y/o (75 y/o, i.e, within the scope of the data we have, it can mean 60 and above also)
df1["age_rec"] = np.where( (df1["age"] >= 16) & (df1["age"] <= 35), 2,
np.where( (df1["age"] >= 36) & (df1["age"] <= 59), 3,
np.where( (df1["age"] >= 60), 4, 0
)
)
)
# Secondary upto 10 – ISCED 3
# Senior Secondary upto 12 - ISCED 3
# Diploma and voca – ISCED 2
# UG – ISCED 2
# PG – ISCED 1
# PhD and higher – ISCED 1
df1["isced"] = np.where(df1["Q3"] == "Post-Graduate Program", 1,
np.where(df1["Q3"] == "Undergraduate Program", 2,
np.where(df1["Q3"] == "Ph.D. and higher", 1,
np.where(df1["Q3"] == "Diploma and Vocational Training", 2,
np.where(df1["Q3"] == "Secondary School (11th to 12th Std.)", 3,
np.where( df1["Q3"] == "Primary School (up to 10th Std.)", 3,
np.where( df1["Q3"] == "M.Phil.", 1, 0
)
)
)
)
)
)
)
df1["income"] = np.where(df1["Q15a"] == "< INR 500,000", 1,
np.where(df1["Q15a"] == "INR 500,001 – INR 15,00,000", 2,
np.where(df1["Q15a"] == "INR 1500,001 – INR 30,00,000", 3,
np.where(df1["Q15a"] == "INR 30,00,001 – INR 50,00,000", 4,
np.where(df1["Q15a"] == "INR 50,00,001 – INR 75,00,000", 5,
np.where( df1["Q15a"] == "> INR 75,00,000", 6,0
)
)
)
)
)
)
df1["wealth"] = df1["Q14b.1"]
In [ ]:
In [ ]:
In [ ]:
In [ ]:
Questions¶
In [5]:
matchCol = ["q8_2_1", "q8_2_2", "q8_2_3", "q8_2_4", "q8_2_5", "q8_3", "q8_4", "q8_5berlin_1",
"q8_5london_1", "q8_5paris_1", "q8_6", "q8_7", "q9_1_1", "q9_2_1", "q9_3",
"q10_1_1", "q10_2_1", "q10_3_1", "q10_4"]
df1[ matchCol ] = 0
df1.columns
Out[5]:
Index(['ResponseId', 'Q0', 'Q1', 'Q2', 'Q3', 'Q4', 'Q5_1', 'Q6_1', 'Q8_1',
'Q8_2', 'Q8_3', 'Q8_4', 'Q8_5', 'Q8_6', 'Q8_7', 'Q8_8', 'Q9b_1',
'Q9b_4', 'Q9b_5', 'Q9b_6', 'Q9b_7', 'Q9b_8', 'Q9b_9', 'Q11a_1',
'Q11a_2', 'Q11a_3', 'Q11b', 'Q11c', 'Q11d', 'Q11h', 'Attention Check',
'Q11i', 'Q12a', 'Q12b', 'Q12c', 'Q13a', 'Q13b', 'Q13c', 'Q13d', 'Q14a',
'Q14b', 'Q14c', 'Q15a', 'Q14b.1', 'age', 'age_rec', 'isced', 'income',
'wealth', 'q8_2_1', 'q8_2_2', 'q8_2_3', 'q8_2_4', 'q8_2_5', 'q8_3',
'q8_4', 'q8_5berlin_1', 'q8_5london_1', 'q8_5paris_1', 'q8_6', 'q8_7',
'q9_1_1', 'q9_2_1', 'q9_3', 'q10_1_1', 'q10_2_1', 'q10_3_1', 'q10_4'],
dtype='object')
In [6]:
df1["q8_2_1"] = np.where(df1["Q11b"].str.contains("HIV test"), 1, 2)
df1["q8_2_2"] = np.where(df1["Q11b"].str.contains("Fingerprint"), 1, 2)
df1["q8_2_3"] = np.where(df1["Q11b"].str.contains("DNA test"), 1, 2)
df1["q8_2_4"] = np.where(df1["Q11b"].str.contains("Cancer screening test"), 1, 2)
df1["q8_2_5"] = np.where(df1["Q11b"].str.contains("Professional horoscope"), 1, 2)
df1["q8_3"] = np.where(df1["Q11c"] == "The lower the quality of the study, the more likely that future studies will change the risk estimate.", 1,
np.where(df1["Q11c"] == "The higher the quality of the study, the more likely that future studies will change the risk estimate.", 2,
np.where(df1["Q11c"] == "Irrespective of the quality of the study, future studies will not change the risk estimate.", 3,
np.where(df1["Q11c"] == "Irrespective of the quality of the study, future studies will change the risk estimate substantially anyway.", 4,0
)
)
)
)
df1["q8_4"] = np.where(df1["Q11d"] == "The growth rate will be 0.4% on average each year", 1,
np.where(df1["Q11d"] == "The growth rate over five years will be exactly 2%", 2,
np.where(df1["Q11d"] == "The growth rate over five years will be between 1% and 3%", 3,
np.where(df1["Q11d"] == "It is not possible to predict the growth rate with certainty", 4,0
)
)
)
)
In [7]:
df1["q8_5berlin_1"] = np.where(df1["Q11a_1"] == 1, 1, 2)
df1["q8_5london_1"] = np.where(df1["Q11a_1"] == 1, 1, 2)
df1["q8_5paris_1"] = np.where(df1["Q11a_1"] == 1, 1, 2)
df1["q8_6"] = np.where(df1["Q11i"] == "The medication increases recovery by 100%", 1,
np.where(df1["Q11i"] == "The medication increases recovery by 50%", 2,
np.where(df1["Q11i"] == "The medication increases recovery by 2%", 3,
np.where(df1["Q11i"] == "None of the above is implied", 4, 0
)
)
)
)
df1["q8_7"] = np.where(df1["Q11h"] == "5 in 100 people prior to the intervention to 6 out of 100 people after the intervention", 1,
np.where(df1["Q11h"] == "100 in 10,000 people prior to the intervention to 120 out of 10,000 people after the intervention", 2,
np.where(df1["Q11h"] == "70 in 100 people prior to the intervention to 90 out of 100 people after the intervention", 3,
np.where(df1["Q11h"] == "It is not possible to determine which of the answers is correct given the information provided", 4, 0
)
)
)
)
df1["q9_1_1"] = df1["Q14a"].copy()
df1["q9_2_1"] = df1["Q14b"].copy()
df1["q9_3"] = np.where(df1["Q14c"] == "Crosicol", 1,
np.where(df1["Q14c"] == "Hertinol", 2,
np.where(df1["Q14c"] == "They are equal", 3,
np.where(df1["Q14c"] == "Can’t say", 4, 0
)
)
)
)
In [8]:
df1["q10_1_1"] = df1["Q13a"].copy()
df1["q10_2_1"] = df1["Q13b"].copy()
df1["q10_3_1"] = df1["Q13c"].copy()
df1["q10_4"] = np.where(df1["Q13d"] == "9 out of 59", 1,
np.where(df1["Q13d"] == "9 out of 10", 2,
np.where(df1["Q13d"] == "59 out of 1000", 3,
np.where(df1["Q13d"] == "59 out of 100", 4, 0
)
)
)
)
In [9]:
df1["ResponseId"].nunique()
Out[9]:
370
In [10]:
scoreColumns = ["certainty1", "certainty2", "certainty3", "certainty4", "certainty5", "uncertainty1", "uncertainty2", "numeracy1", "numeracy2", "numeracy3", "numeracy4", "numeracy5", "graph1", "graph2", "graph3", "riskcalculation1", "riskcalculation2", "riskcalculation3", "riskcalculation4"]
df1[scoreColumns] = 0
In [11]:
# Assigning scores
def scoring1(df1Facet):
df1Facet.loc[ df1Facet["q8_2_1"] == 2, "certainty1"] = 1
df1Facet.loc[ df1Facet["q8_2_2"] == 2, "certainty2"] = 1
df1Facet.loc[ df1Facet["q8_2_3"] == 2, "certainty3"] = 1
df1Facet.loc[ df1Facet["q8_2_4"] == 2, "certainty4"] = 1
df1Facet.loc[ df1Facet["q8_2_5"] == 2, "certainty5"] = 1
df1Facet.loc[ df1Facet["q8_3"] == 1, "uncertainty1"] = 1
df1Facet.loc[ df1Facet["q8_4"] == 4, "uncertainty2"] = 1
df1Facet.loc[ df1Facet["q8_5berlin_1"] == 2, "numeracy1"] = 1
df1Facet.loc[ df1Facet["q8_5london_1"] == 1, "numeracy2"] = 1
df1Facet.loc[ df1Facet["q8_5paris_1"] == 1, "numeracy3"] = 1
df1Facet.loc[ df1Facet["q8_6"] == 1, "numeracy4"] = 1
df1Facet.loc[ df1Facet["q8_7"] == 4, "numeracy5"] = 1
df1Facet.loc[ df1Facet["q9_1_1"] == 25, "graph1"] = 1
df1Facet.loc[ df1Facet["q9_2_1"] == 20, "graph2"] = 1
df1Facet.loc[ df1Facet["q9_3"] == 3, "graph3"] = 1
df1Facet.loc[ df1Facet["q10_1_1"] == 500, "riskcalculation1"] = 1
df1Facet.loc[ df1Facet["q10_2_1"] == 10, "riskcalculation2"] = 1
df1Facet.loc[ (df1Facet["q10_3_1"] == 0.1) | (df1Facet["q10_3_1"] == ".1") | (df1Facet["q10_3_1"] == ",1") , "riskcalculation3"] = 1
df1Facet.loc[ df1Facet["q10_4"] == 1, "riskcalculation4"] = 1
# Assigning total scores
#df1Facet["Certainty score_5"] = df1Facet["certainty1"] + df1Facet["certainty2"] + df1Facet["certainty3"] + df1Facet["certainty4"] + df1Facet["certainty5"]
#df1Facet["Uncertainty score_5"] = df1Facet["uncertainty1"] + df1Facet["uncertainty2"]
#df1Facet["Number Comprehension score_5"] = df1Facet["numeracy1"] + df1Facet["numeracy2"] + df1Facet["numeracy3"] + df1Facet["numeracy4"] + df1Facet["numeracy5"]
#df1Facet["Graph Comprehension score_5"] = df1Facet["graph1"] + df1Facet["graph2"] + df1Facet["graph3"]
#df1Facet["Calculation score_4"] = df1Facet["riskcalculation1"] + df1Facet["riskcalculation2"] + df1Facet["riskcalculation3"] + df1Facet["riskcalculation4"]
#df1Facet["Total Score_19"] = df1Facet["Certainty score_5"] + df1Facet["Uncertainty score_5"] + df1Facet["Number Comprehension score_5"] + df1Facet["Graph Comprehension score_5"] + df1Facet["Calculation score_4"]
return df1Facet
In [12]:
df1Facet = df1.groupby(["ResponseId"]).progress_apply(scoring1)
100%|███████████████████████████████████████████████████████████████████████████████| 370/370 [00:02<00:00, 172.64it/s]
In [13]:
df1Facet
Out[13]:
| ResponseId | Q0 | Q1 | Q2 | Q3 | Q4 | Q5_1 | Q6_1 | Q8_1 | Q8_2 | Q8_3 | Q8_4 | Q8_5 | Q8_6 | Q8_7 | Q8_8 | Q9b_1 | Q9b_4 | Q9b_5 | Q9b_6 | Q9b_7 | Q9b_8 | Q9b_9 | Q11a_1 | Q11a_2 | Q11a_3 | Q11b | Q11c | Q11d | Q11h | Attention Check | Q11i | Q12a | Q12b | Q12c | Q13a | Q13b | Q13c | Q13d | Q14a | Q14b | Q14c | Q15a | Q14b.1 | age | age_rec | isced | income | wealth | q8_2_1 | q8_2_2 | q8_2_3 | q8_2_4 | q8_2_5 | q8_3 | q8_4 | q8_5berlin_1 | q8_5london_1 | q8_5paris_1 | q8_6 | q8_7 | q9_1_1 | q9_2_1 | q9_3 | q10_1_1 | q10_2_1 | q10_3_1 | q10_4 | certainty1 | certainty2 | certainty3 | certainty4 | certainty5 | uncertainty1 | uncertainty2 | numeracy1 | numeracy2 | numeracy3 | numeracy4 | numeracy5 | graph1 | graph2 | graph3 | riskcalculation1 | riskcalculation2 | riskcalculation3 | riskcalculation4 | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ResponseId | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| R_1YkMM2lMB9aEuVL | 150 | R_1YkMM2lMB9aEuVL | Yes, I would like to participate in the study ... | Female | 41 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 3 | 3 | 3 | 4 | 3 | 3 | 3 | 3 | 3 | 4 | 85.0 | 0.0 | 0 | 0 | 5.0 | 10 | 0.0 | 1 | 1 | 1 | DNA test | The higher the quality of the study, the more ... | The growth rate over five years will be betwee... | 5 in 100 people prior to the intervention to 6... | Vase | The medication increases recovery by 50% | Less than $102 | Exactly the same as today with the money in th... | True | 1000 | 10 | 1.000 | 59 out of 1000 | 9.0 | 20 | Crosicol | < INR 500,000 | 25000.0 | 41 | 3 | 2 | 1 | 25000.0 | 2 | 2 | 1 | 2 | 2 | 2 | 3 | 1 | 1 | 1 | 2 | 1 | 9.0 | 20 | 1 | 1000 | 10 | 1.000 | 3 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 |
| R_401xXJ9E3ZgU9gu | 96 | R_401xXJ9E3ZgU9gu | Yes, I would like to participate in the study ... | Male | 33 | Undergraduate Program | Stock analyst | 7 (willing to take risk) | 8 | 5 | 4 | 5 | 5 | 5 | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 0.0 | 0.0 | 10 | 0 | 30.0 | 60 | 0.0 | 2 | 1 | 1 | Fingerprint,DNA test | The higher the quality of the study, the more ... | The growth rate over five years will be betwee... | 100 in 10,000 people prior to the intervention... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 1 | 0.001 | 59 out of 1000 | 25.0 | 20 | They are equal | INR 500,001 – INR 15,00,000 | 16000000.0 | 33 | 2 | 2 | 2 | 16000000.0 | 2 | 1 | 1 | 2 | 2 | 2 | 3 | 2 | 2 | 2 | 1 | 2 | 25.0 | 20 | 3 | 500 | 1 | 0.001 | 3 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
| R_40N8bUn3C0jwGzf | 200 | R_40N8bUn3C0jwGzf | Yes, I would like to participate in the study ... | Female | 18 | Undergraduate Program | Student | 0 (unwilling to take risk) | 0 (no investment experience) | 3 | 3 | 4 | 3 | 3 | 3 | 2 | 3 | 40.0 | 20.0 | 10 | 10 | 10.0 | 5 | 5.0 | 2 | 1 | 1 | HIV test,Fingerprint,DNA test,Cancer screening... | The higher the quality of the study, the more ... | The growth rate over five years will be betwee... | It is not possible to determine which of the a... | Vase | The medication increases recovery by 2% | More than $102 | More than today with the money in this account | True | 1000 | 500 | 100.000 | 9 out of 59 | 11.0 | 60 | They are equal | < INR 500,000 | 80000.0 | 18 | 2 | 2 | 1 | 80000.0 | 1 | 1 | 1 | 1 | 2 | 2 | 3 | 2 | 2 | 2 | 3 | 4 | 11.0 | 60 | 3 | 1000 | 500 | 100.000 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
| R_40VyTmJ6i96wUP4 | 235 | R_40VyTmJ6i96wUP4 | Yes, I would like to participate in the study ... | Female | 34 | Post-Graduate Program | Not employed | 2 | 0 (no investment experience) | 3 | 2 | 2 | 3 | 3 | 3 | 2 | 3 | 40.0 | 25.0 | 10 | 10 | 15.0 | 0 | 0.0 | 1 | 2 | 1 | DNA test | The higher the quality of the study, the more ... | It is not possible to predict the growth rate ... | It is not possible to determine which of the a... | Vase | The medication increases recovery by 2% | Refuse to answer | Less than today with the money in this account | Do not know | 698 | 352 | 1.000 | 59 out of 1000 | 30.0 | 20 | Crosicol | < INR 500,000 | 500000.0 | 34 | 2 | 1 | 1 | 500000.0 | 2 | 2 | 1 | 2 | 2 | 2 | 4 | 1 | 1 | 1 | 3 | 4 | 30.0 | 20 | 1 | 698 | 352 | 1.000 | 3 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| R_40ZEg7vX3Y7mMQl | 184 | R_40ZEg7vX3Y7mMQl | Yes, I would like to participate in the study ... | Female | 31 | Undergraduate Program | Not employed | 4 | 6 | 2 | 2 | 2 | 1 (strongly disagree) | 2 | 3 | 4 | 1 (strongly disagree) | 30.0 | 30.0 | 10 | 0 | 20.0 | 10 | 0.0 | 1 | 1 | 1 | HIV test,Fingerprint,DNA test,Cancer screening... | The higher the quality of the study, the more ... | The growth rate will be 0.4% on average each year | 70 in 100 people prior to the intervention to ... | Vase | The medication increases recovery by 2% | More than $102 | Less than today with the money in this account | True | 500 | 10 | 10.000 | 59 out of 1000 | 50.0 | 20 | They are equal | < INR 500,000 | 500000.0 | 31 | 2 | 2 | 1 | 500000.0 | 1 | 1 | 1 | 1 | 2 | 2 | 1 | 1 | 1 | 1 | 3 | 3 | 50.0 | 20 | 3 | 500 | 10 | 10.000 | 3 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| R_4yzFrFYMa7w8nQZ | 238 | R_4yzFrFYMa7w8nQZ | Yes, I would like to participate in the study ... | Female | 18 | Secondary School (11th to 12th Std.) | Student | 0 (unwilling to take risk) | 0 (no investment experience) | 6 (strongly agree) | 5 | 5 | 5 | 4 | 4 | 5 | 6 (strongly agree) | 20.0 | 10.0 | 15 | 25 | 5.0 | 9 | 16.0 | 1 | 2 | 1 | DNA test | The higher the quality of the study, the more ... | The growth rate over five years will be exactl... | 70 in 100 people prior to the intervention to ... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | True | 1000 | 100 | 100.000 | 59 out of 1000 | 25.0 | 20 | Hertinol | < INR 500,000 | 90000.0 | 18 | 2 | 3 | 1 | 90000.0 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 1 | 1 | 1 | 1 | 3 | 25.0 | 20 | 2 | 1000 | 100 | 100.000 | 3 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 |
| R_4zPiSk23ayGvFD4 | 364 | R_4zPiSk23ayGvFD4 | Yes, I would like to participate in the study ... | Male | 70 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 4 | 6 | 4 | 4 | 4 | 4 | 5 | 5 | 5 | 4 | 15.0 | 20.0 | 10 | 0 | 25.0 | 30 | 0.0 | 2 | 1 | 1 | DNA test | The higher the quality of the study, the more ... | It is not possible to predict the growth rate ... | It is not possible to determine which of the a... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 10 | 1.000 | 59 out of 1000 | 25.0 | 20 | Crosicol | INR 1500,001 – INR 30,00,000 | 7000000.0 | 70 | 4 | 2 | 3 | 7000000.0 | 2 | 2 | 1 | 2 | 2 | 2 | 4 | 2 | 2 | 2 | 1 | 4 | 25.0 | 20 | 1 | 500 | 10 | 1.000 | 3 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 |
| R_4zdzjECYeO2A5Ox | 255 | R_4zdzjECYeO2A5Ox | Yes, I would like to participate in the study ... | Female | 19 | Secondary School (11th to 12th Std.) | Student | 4 | 2 | 6 (strongly agree) | 4 | 5 | 5 | 5 | 6 (strongly agree) | 3 | 4 | 40.0 | 50.0 | 0 | 10 | 0.0 | 0 | 0.0 | 2 | 1 | 1 | Fingerprint,DNA test | The lower the quality of the study, the more l... | The growth rate over five years will be exactl... | 70 in 100 people prior to the intervention to ... | Vase | None of the above is implied | More than $102 | Less than today with the money in this account | Do not know | 500 | 10 | 0.100 | 9 out of 59 | 25.0 | 20 | They are equal | < INR 500,000 | 800000.0 | 19 | 2 | 3 | 1 | 800000.0 | 2 | 1 | 1 | 2 | 2 | 1 | 2 | 2 | 2 | 2 | 4 | 3 | 25.0 | 20 | 3 | 500 | 10 | 0.100 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
| R_8k6D0jzzHCC5X3Z | 369 | R_8k6D0jzzHCC5X3Z | Yes, I would like to participate in the study ... | Male | 25 | Undergraduate Program | Salaried/Employee/Consultant in a sector other... | 7 (willing to take risk) | 8 | 6 (strongly agree) | 5 | 5 | 4 | 6 (strongly agree) | 6 (strongly agree) | 6 (strongly agree) | 5 | 5.0 | 5.0 | 5 | 5 | 30.0 | 20 | 30.0 | 2 | 1 | 1 | DNA test | The higher the quality of the study, the more ... | It is not possible to predict the growth rate ... | 5 in 100 people prior to the intervention to 6... | Vase | The medication increases recovery by 100% | More than $102 | Less than today with the money in this account | False | 500 | 500 | 50.000 | 59 out of 1000 | 25.0 | 10 | Hertinol | > INR 75,00,000 | 8000000.0 | 25 | 2 | 2 | 6 | 8000000.0 | 2 | 2 | 1 | 2 | 2 | 2 | 4 | 2 | 2 | 2 | 1 | 1 | 25.0 | 10 | 2 | 500 | 500 | 50.000 | 3 | 1 | 1 | 0 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
| R_8taN3wUPbeZMxzM | 2 | R_8taN3wUPbeZMxzM | Yes, I would like to participate in the study ... | Male | 20 | Undergraduate Program | Student | 3 | 0 (no investment experience) | 6 (strongly agree) | 6 (strongly agree) | 4 | 1 (strongly disagree) | 1 (strongly disagree) | 2 | 1 (strongly disagree) | 6 (strongly agree) | 30.0 | 40.0 | 10 | 10 | 0.0 | 5 | 5.0 | 1 | 1 | 1 | Fingerprint | The higher the quality of the study, the more ... | It is not possible to predict the growth rate ... | 70 in 100 people prior to the intervention to ... | Vase | The medication increases recovery by 50% | More than $102 | More than today with the money in this account | Do not know | 1000 | 10 | 50.000 | 9 out of 59 | 50.0 | 20 | They are equal | < INR 500,000 | 10000.0 | 20 | 2 | 2 | 1 | 10000.0 | 2 | 1 | 2 | 2 | 2 | 2 | 4 | 1 | 1 | 1 | 2 | 3 | 50.0 | 20 | 3 | 1000 | 10 | 50.000 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 1 |
370 rows × 87 columns
In [14]:
df1 = df1Facet.copy()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [15]:
df1["Certainty_3"] = df1["certainty1"] + df1["certainty2"] + df1["certainty3"]
df1["RiskComprehension_4"] = df1["riskcalculation1"] + df1["riskcalculation2"] + df1["riskcalculation3"] + df1["riskcalculation4"]
df1["GraphLiteracy_3"] = df1["graph1"] + df1["graph2"] + df1["graph3"]
df1["Numeracy_3"] = df1["numeracy1"] + df1["numeracy2"] + df1["numeracy3"]
df1["Bayesianreasoning_1"] = df1["numeracy4"]
df1["TotalScore_14"] = df1["Certainty_3"] + df1["RiskComprehension_4"] + df1["GraphLiteracy_3"] + df1["Numeracy_3"] + df1["Bayesianreasoning_1"]
df1["Certainty_%"] = df1["Certainty_3"] / 3 * 100
df1["RiskComprehension_%"] = df1["RiskComprehension_4"] / 4 * 100
df1["GraphLiteracy_%"] = df1["GraphLiteracy_3"] / 3 * 100
df1["Numeracy_%"] = df1["Numeracy_3"] / 3 * 100
df1["Bayesianreasoning_%"] = df1["Bayesianreasoning_1"] / 1 * 100
df1["TotalScore_%"] = df1["TotalScore_14"] / 14 * 100
colReq = ["ResponseId", "age", "age_rec", "isced", "income", "wealth", "Certainty_3", "RiskComprehension_4", "GraphLiteracy_3",
"Numeracy_3", "Bayesianreasoning_1", "Certainty_%", "RiskComprehension_%", "GraphLiteracy_%", "Numeracy_%", "Bayesianreasoning_%",
"TotalScore_14", "TotalScore_%",]
df2 = df1[colReq].copy()
df2
Out[15]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ResponseId | |||||||||||||||||||
| R_1YkMM2lMB9aEuVL | 150 | R_1YkMM2lMB9aEuVL | 41 | 3 | 2 | 1 | 25000.0 | 2 | 1 | 1 | 2 | 0 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 0.0 | 6 | 42.857143 |
| R_401xXJ9E3ZgU9gu | 96 | R_401xXJ9E3ZgU9gu | 33 | 2 | 2 | 2 | 16000000.0 | 1 | 1 | 3 | 1 | 1 | 33.333333 | 25.0 | 100.000000 | 33.333333 | 100.0 | 7 | 50.000000 |
| R_40N8bUn3C0jwGzf | 200 | R_40N8bUn3C0jwGzf | 18 | 2 | 2 | 1 | 80000.0 | 0 | 1 | 1 | 1 | 0 | 0.000000 | 25.0 | 33.333333 | 33.333333 | 0.0 | 3 | 21.428571 |
| R_40VyTmJ6i96wUP4 | 235 | R_40VyTmJ6i96wUP4 | 34 | 2 | 1 | 1 | 500000.0 | 2 | 0 | 1 | 2 | 0 | 66.666667 | 0.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 |
| R_40ZEg7vX3Y7mMQl | 184 | R_40ZEg7vX3Y7mMQl | 31 | 2 | 2 | 1 | 500000.0 | 0 | 2 | 2 | 2 | 0 | 0.000000 | 50.0 | 66.666667 | 66.666667 | 0.0 | 6 | 42.857143 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| R_4yzFrFYMa7w8nQZ | 238 | R_4yzFrFYMa7w8nQZ | 18 | 2 | 3 | 1 | 90000.0 | 2 | 0 | 2 | 2 | 1 | 66.666667 | 0.0 | 66.666667 | 66.666667 | 100.0 | 7 | 50.000000 |
| R_4zPiSk23ayGvFD4 | 364 | R_4zPiSk23ayGvFD4 | 70 | 4 | 2 | 3 | 7000000.0 | 2 | 2 | 2 | 1 | 1 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 100.0 | 8 | 57.142857 |
| R_4zdzjECYeO2A5Ox | 255 | R_4zdzjECYeO2A5Ox | 19 | 2 | 3 | 1 | 800000.0 | 1 | 4 | 3 | 1 | 0 | 33.333333 | 100.0 | 100.000000 | 33.333333 | 0.0 | 9 | 64.285714 |
| R_8k6D0jzzHCC5X3Z | 369 | R_8k6D0jzzHCC5X3Z | 25 | 2 | 2 | 6 | 8000000.0 | 2 | 1 | 1 | 1 | 1 | 66.666667 | 25.0 | 33.333333 | 33.333333 | 100.0 | 6 | 42.857143 |
| R_8taN3wUPbeZMxzM | 2 | R_8taN3wUPbeZMxzM | 20 | 2 | 2 | 1 | 10000.0 | 2 | 2 | 2 | 2 | 0 | 66.666667 | 50.0 | 66.666667 | 66.666667 | 0.0 | 8 | 57.142857 |
370 rows × 18 columns
In [16]:
# The following graph represents frequency of each data point on "TotalScore_14" or the total score out of 19 questions across the sample.
df2['TotalScore_14'].plot(kind = 'hist', xticks = np.arange(0, 15, step=1), xlabel = 'TotalScore_14', title = 'Frequency of Scores' )
Out[16]:
<Axes: title={'center': 'Frequency of Scores'}, xlabel='TotalScore_14', ylabel='Frequency'>
In [17]:
# Absolute mean scores for each facet
(df2[['Certainty_3','RiskComprehension_4','GraphLiteracy_3','Numeracy_3','Bayesianreasoning_1','TotalScore_14']].mean(axis = 0)).plot(kind = 'bar', title = 'Absolute mean of scores for above data set')
Out[17]:
<Axes: title={'center': 'Absolute mean of scores for above data set'}>
In [18]:
# Normalised mean scores for each facet
df2[['Certainty_%','RiskComprehension_%','GraphLiteracy_%','Numeracy_%','Bayesianreasoning_%','TotalScore_%']].mean(axis = 0).plot(kind = 'bar', title = 'Normalised mean of scores for above data set')
Out[18]:
<Axes: title={'center': 'Normalised mean of scores for above data set'}>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [19]:
# DF3 = Sorted by income
df3 = df2.sort_values(by = 'income')
df3 = df3.reset_index(drop = True)
df3.drop(df3[df3['income'] == 7].index, inplace = True)
df3
Out[19]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R_1YkMM2lMB9aEuVL | 41 | 3 | 2 | 1 | 25000.0 | 2 | 1 | 1 | 2 | 0 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 0.0 | 6 | 42.857143 |
| 1 | R_4TuAjTfgjSCm6mA | 29 | 2 | 2 | 1 | 5000000.0 | 0 | 0 | 3 | 2 | 0 | 0.000000 | 0.0 | 100.000000 | 66.666667 | 0.0 | 5 | 35.714286 |
| 2 | R_4U1OQvPwcT66nIZ | 31 | 2 | 2 | 1 | 3000000.0 | 1 | 3 | 3 | 1 | 0 | 33.333333 | 75.0 | 100.000000 | 33.333333 | 0.0 | 8 | 57.142857 |
| 3 | R_4UEtElZbFSSlXJ7 | 21 | 2 | 1 | 1 | 100000.0 | 2 | 2 | 3 | 2 | 0 | 66.666667 | 50.0 | 100.000000 | 66.666667 | 0.0 | 9 | 64.285714 |
| 4 | R_4UhYEPTA8MuBMxr | 23 | 2 | 2 | 1 | 20000000.0 | 1 | 1 | 2 | 2 | 0 | 33.333333 | 25.0 | 66.666667 | 66.666667 | 0.0 | 6 | 42.857143 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 365 | R_4gkipJeAyfFWtDH | 59 | 3 | 1 | 6 | 10000000.0 | 1 | 2 | 1 | 2 | 0 | 33.333333 | 50.0 | 33.333333 | 66.666667 | 0.0 | 6 | 42.857143 |
| 366 | R_48SQkLXmsHzNHIf | 25 | 2 | 2 | 6 | 100.0 | 2 | 1 | 1 | 2 | 0 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 0.0 | 6 | 42.857143 |
| 367 | R_4aQfSYuVQoXAOIh | 30 | 2 | 3 | 6 | 50000.0 | 2 | 1 | 1 | 2 | 0 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 0.0 | 6 | 42.857143 |
| 368 | R_4GeNwNOEgwD5rwL | 19 | 2 | 3 | 6 | 400000.0 | 2 | 2 | 1 | 1 | 0 | 66.666667 | 50.0 | 33.333333 | 33.333333 | 0.0 | 6 | 42.857143 |
| 369 | R_4rGgRh3Y7Grn2f5 | 20 | 2 | 2 | 6 | 500000.0 | 2 | 0 | 0 | 1 | 0 | 66.666667 | 0.0 | 0.000000 | 33.333333 | 0.0 | 3 | 21.428571 |
370 rows × 18 columns
In [20]:
# Absolute mean score for each facet (Sort by Income)
df3[['Certainty_3','RiskComprehension_4','GraphLiteracy_3','Numeracy_3','Bayesianreasoning_1','TotalScore_14']].mean(axis = 0).plot(kind = 'bar', title = 'Absolute mean score for each facet (Sort by Income)')
Out[20]:
<Axes: title={'center': 'Absolute mean score for each facet (Sort by Income)'}>
In [21]:
# Normalised mean score for each facet (Sort by Income)
(df3[['Certainty_%','RiskComprehension_%','GraphLiteracy_%','Numeracy_%','Bayesianreasoning_%','TotalScore_%']].mean(axis = 0)).plot(kind = 'bar', title = 'Normalised mean score for each facet (Sort by Income)')
Out[21]:
<Axes: title={'center': 'Normalised mean score for each facet (Sort by Income)'}>
In [22]:
# Normalised mean of each facet as a numerical (sorted by income)
df3.groupby('income')[['Certainty_%','RiskComprehension_%','GraphLiteracy_%','Numeracy_%','Bayesianreasoning_%','TotalScore_%']].mean()
Out[22]:
| Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_% | |
|---|---|---|---|---|---|---|
| income | ||||||
| 1 | 56.281407 | 46.231156 | 52.931323 | 48.073702 | 29.145729 | 48.994975 |
| 2 | 47.798742 | 47.641509 | 51.257862 | 47.169811 | 30.188679 | 47.102426 |
| 3 | 45.714286 | 35.714286 | 48.571429 | 49.523810 | 45.714286 | 44.285714 |
| 4 | 48.888889 | 46.666667 | 42.222222 | 44.444444 | 40.000000 | 45.238095 |
| 5 | 44.444444 | 33.333333 | 27.777778 | 44.444444 | 16.666667 | 35.714286 |
| 6 | 62.962963 | 30.555556 | 22.222222 | 55.555556 | 11.111111 | 39.682540 |
In [23]:
# Count of responses for each category (sorted by income)
df3.groupby('income')[['ResponseId']].count()
Out[23]:
| ResponseId | |
|---|---|
| income | |
| 1 | 199 |
| 2 | 106 |
| 3 | 35 |
| 4 | 15 |
| 5 | 6 |
| 6 | 9 |
In [24]:
# Absolute mean of Total Facet score line plot (sorted by income)
df3.groupby('income')[['TotalScore_14']].mean().plot( kind = 'line', title = 'Absolute mean of Total Facet score line plot (sorted by income)').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[24]:
<matplotlib.legend.Legend at 0x2aa605a2f90>
In [25]:
# Absolute mean of each Facet score line plot (sorted by income)
df3.groupby('income')[['Certainty_3','RiskComprehension_4','GraphLiteracy_3','Numeracy_3','Bayesianreasoning_1']].mean().plot( kind = 'line', title = 'Absolute Facet scores mean line plot').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[25]:
<matplotlib.legend.Legend at 0x2aa61682790>
In [26]:
# Normalised mean of each Facet score line plot (sorted by income)
df3.groupby('income')[['Certainty_%','RiskComprehension_%','GraphLiteracy_%','Numeracy_%','Bayesianreasoning_%', 'TotalScore_%']].mean().plot( kind = 'line', title = 'Normalised mean of each Facet score line plot (sorted by income)').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[26]:
<matplotlib.legend.Legend at 0x2aa61740c50>
In [27]:
# Normalised mean of each Facet score stacked plot (sorted by income)
df3.groupby('income')[['Certainty_%','RiskComprehension_%','GraphLiteracy_%','Numeracy_%','Bayesianreasoning_%']].mean().plot( kind = 'bar', title = 'Normalised mean of each Facet score stacked plot (sorted by income)', stacked = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[27]:
<matplotlib.legend.Legend at 0x2aa60697690>
In [28]:
# Normalised mean of each Facet score hist plot (sorted by income)
df3.groupby('income')[['Certainty_%','RiskComprehension_%','GraphLiteracy_%','Numeracy_%','Bayesianreasoning_%','TotalScore_%']].mean().T.plot(kind = 'bar', title = 'Normalised mean scores for each facet for each wealth response category').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[28]:
<matplotlib.legend.Legend at 0x2aa5ffd18d0>
In [29]:
# Trend line for Absolute Total Facet Score vs income
sns.regplot (data = df3, x = 'income', y = 'TotalScore_14')
Out[29]:
<Axes: xlabel='income', ylabel='TotalScore_14'>
In [30]:
# Trend line for Absolute Independent Facet Score vs income reponses
fig, ax = plt.subplots(figsize=(6, 6))
sns.regplot (data = df3, x = 'income', y = 'Certainty_3', fit_reg=True, ci=None, ax=ax, label='Certainty_3')
sns.regplot (data = df3, x = 'income', y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax, label='RiskComprehension_4')
sns.regplot (data = df3, x = 'income', y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax, label='Numeracy_3')
sns.regplot (data = df3, x = 'income', y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax, label='GraphLiteracy_3')
sns.regplot (data = df3, x = 'income', y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax, label='Bayesianreasoning_1' )
ax.set(ylabel='Scores', xlabel='income')
ax.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [31]:
# Violine Plot for TotalScore_14 for each entry under income.
sns.violinplot( data = df3, x = 'income', y = 'TotalScore_14')
Out[31]:
<Axes: xlabel='income', ylabel='TotalScore_14'>
In [32]:
# Violine Plot for Certainty_3 for each entry under income.
sns.violinplot( data = df3, x = 'income', y = 'Certainty_3')
Out[32]:
<Axes: xlabel='income', ylabel='Certainty_3'>
In [33]:
# Violine Plot for RiskComprehension_4 for each entry under income.
sns.violinplot( data = df3, x = 'income', y = 'RiskComprehension_4')
Out[33]:
<Axes: xlabel='income', ylabel='RiskComprehension_4'>
In [34]:
# Violine Plot for GraphLiteracy_3 for each entry under income.
sns.violinplot( data = df3, x = 'income', y = 'GraphLiteracy_3')
Out[34]:
<Axes: xlabel='income', ylabel='GraphLiteracy_3'>
In [35]:
# Violine Plot for Numeracy_3 for each entry under income.
sns.violinplot( data = df3, x = 'income', y = 'Numeracy_3')
Out[35]:
<Axes: xlabel='income', ylabel='Numeracy_3'>
In [36]:
# Violine Plot for TotalScore_14 for each entry under income.
sns.violinplot( data = df3, x = 'income', y = 'Bayesianreasoning_1')
Out[36]:
<Axes: xlabel='income', ylabel='Bayesianreasoning_1'>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [37]:
# Data Frame 2 or df2 is an aggregate data on facet total scores and over all total score, along with wealth and income data vs each response.
# This data set is also sorted by wealth with all responses 'NA' removed.
# We also assign quartiles ranking each response according to this sort.
df4 = df2.sort_values(by = 'wealth')
df4 = df4.reset_index(drop = True)
df4 = df4.dropna(axis = 0, subset = 'wealth')
df4.insert(loc = len(df4.columns), column = "Quartile Number", value = pd.qcut(df4["wealth"],q = 4, labels = False ) + 1, allow_duplicates = 'False')
df4
Out[37]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R_4DPGUOAP86yO30R | 19 | 2 | 3 | 1 | 0.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 1 |
| 1 | R_4lnmGavSf6rw1eE | 31 | 2 | 1 | 1 | 0.0 | 0 | 3 | 1 | 1 | 0 | 0.000000 | 75.0 | 33.333333 | 33.333333 | 0.0 | 5 | 35.714286 | 1 |
| 2 | R_4MJAzsNYYA69Y8p | 27 | 2 | 3 | 1 | 0.0 | 2 | 1 | 1 | 2 | 1 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 100.0 | 7 | 50.000000 | 1 |
| 3 | R_4fNO2Z5GF8KerZI | 24 | 2 | 2 | 2 | 0.0 | 3 | 3 | 1 | 2 | 1 | 100.000000 | 75.0 | 33.333333 | 66.666667 | 100.0 | 10 | 71.428571 | 1 |
| 4 | R_41bfnamM0zpH94i | 30 | 2 | 1 | 2 | 0.0 | 2 | 2 | 1 | 1 | 1 | 66.666667 | 50.0 | 33.333333 | 33.333333 | 100.0 | 7 | 50.000000 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 365 | R_4M4Yd7LGKym7BaF | 32 | 2 | 1 | 3 | 100000000.0 | 2 | 1 | 3 | 2 | 0 | 66.666667 | 25.0 | 100.000000 | 66.666667 | 0.0 | 8 | 57.142857 | 4 |
| 366 | R_4XgkSFVYD7YAAsS | 18 | 2 | 2 | 3 | 100000000.0 | 1 | 1 | 1 | 2 | 0 | 33.333333 | 25.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 | 4 |
| 367 | R_4FZL5zwyR3HMcyl | 20 | 2 | 3 | 2 | 100000000.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 4 |
| 368 | R_4f2KZvIaLdZvVqN | 27 | 2 | 2 | 5 | 120000000.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 4 |
| 369 | R_4kHu85k7anbQck8 | 25 | 2 | 2 | 2 | 150000000.0 | 0 | 3 | 1 | 1 | 1 | 0.000000 | 75.0 | 33.333333 | 33.333333 | 100.0 | 6 | 42.857143 | 4 |
370 rows × 19 columns
In [38]:
# Absolute mean score for each facet (Sort by Wealth)
df4[['Certainty_3','RiskComprehension_4','Numeracy_3','GraphLiteracy_3','Bayesianreasoning_1','TotalScore_14']].mean(axis = 0).plot(kind = 'bar', title = 'Absolute mean score for each facet (Sort by Wealth)')
Out[38]:
<Axes: title={'center': 'Absolute mean score for each facet (Sort by Wealth)'}>
In [39]:
# Normalised mean score for each facet (Sort by Wealth)
(df4[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%','TotalScore_%']].mean(axis = 0)).plot(kind = 'bar', title = 'Normalised mean score for each facet (Sort by Wealth)')
Out[39]:
<Axes: title={'center': 'Normalised mean score for each facet (Sort by Wealth)'}>
In [40]:
# Line Plot of Absolute Total Facet score vs sorted wealth
df4.plot(use_index = 'True' , kind = 'line', y = ['TotalScore_14'], title = 'Total responses line plot sorted by Wealth',xlabel = 'Unique responses sorted by wealth', ylabel = 'TotalScore_14')
Out[40]:
<Axes: title={'center': 'Total responses line plot sorted by Wealth'}, xlabel='Unique responses sorted by wealth', ylabel='TotalScore_14'>
In [41]:
# Absolute Facet scores line plot from low to high wealth
# I do understand that this is non readable
df4[['Certainty_3','RiskComprehension_4','Numeracy_3','GraphLiteracy_3','Bayesianreasoning_1']].plot( kind = 'line', title = 'Absolute Facet scores line plot', use_index = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[41]:
<matplotlib.legend.Legend at 0x2aa60da2210>
In [42]:
# Trend line for Absolute Total Facet Score vs unique wealth reponses
sns.regplot (data = df4, x = df4.index, y = 'TotalScore_14')
Out[42]:
<Axes: ylabel='TotalScore_14'>
In [43]:
# Trend line for Absolute Independent Facet Score vs wealth reponses
fig1, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4, x = df4.index, y = 'Certainty_3', fit_reg=True, ci=None, ax=ax1, label='Certainty_3')
sns.regplot (data = df4, x = df4.index, y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax1, label='RiskComprehension_4')
sns.regplot (data = df4, x = df4.index, y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax1, label='Numeracy_3')
sns.regplot (data = df4, x = df4.index, y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax1, label='GraphLiteracy_3')
sns.regplot (data = df4, x = df4.index, y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax1, label='Bayesianreasoning_1' )
ax1.set(ylabel='Scores', xlabel='wealth')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [44]:
# Count of number of responses under each quartile
df4.groupby('Quartile Number')[['Quartile Number']].count()
Out[44]:
| Quartile Number | |
|---|---|
| Quartile Number | |
| 1 | 97 |
| 2 | 91 |
| 3 | 96 |
| 4 | 86 |
In [45]:
df4.loc[df4['Quartile Number']==1]
Out[45]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R_4DPGUOAP86yO30R | 19 | 2 | 3 | 1 | 0.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 1 |
| 1 | R_4lnmGavSf6rw1eE | 31 | 2 | 1 | 1 | 0.0 | 0 | 3 | 1 | 1 | 0 | 0.000000 | 75.0 | 33.333333 | 33.333333 | 0.0 | 5 | 35.714286 | 1 |
| 2 | R_4MJAzsNYYA69Y8p | 27 | 2 | 3 | 1 | 0.0 | 2 | 1 | 1 | 2 | 1 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 100.0 | 7 | 50.000000 | 1 |
| 3 | R_4fNO2Z5GF8KerZI | 24 | 2 | 2 | 2 | 0.0 | 3 | 3 | 1 | 2 | 1 | 100.000000 | 75.0 | 33.333333 | 66.666667 | 100.0 | 10 | 71.428571 | 1 |
| 4 | R_41bfnamM0zpH94i | 30 | 2 | 1 | 2 | 0.0 | 2 | 2 | 1 | 1 | 1 | 66.666667 | 50.0 | 33.333333 | 33.333333 | 100.0 | 7 | 50.000000 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 92 | R_49sPXSg2MnDTzkR | 21 | 2 | 2 | 1 | 100000.0 | 1 | 4 | 3 | 2 | 1 | 33.333333 | 100.0 | 100.000000 | 66.666667 | 100.0 | 11 | 78.571429 | 1 |
| 93 | R_49ucUf021OosPGQ | 35 | 2 | 1 | 1 | 100000.0 | 1 | 3 | 3 | 1 | 0 | 33.333333 | 75.0 | 100.000000 | 33.333333 | 0.0 | 8 | 57.142857 | 1 |
| 94 | R_4A9Iv5lYYQeK969 | 36 | 3 | 1 | 2 | 100000.0 | 2 | 1 | 1 | 1 | 0 | 66.666667 | 25.0 | 33.333333 | 33.333333 | 0.0 | 5 | 35.714286 | 1 |
| 95 | R_4CsvgqFBLfzgRSL | 43 | 3 | 1 | 1 | 100000.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 1 |
| 96 | R_4CCuDlH0lEcMARo | 23 | 2 | 2 | 1 | 100000.0 | 0 | 2 | 3 | 1 | 0 | 0.000000 | 50.0 | 100.000000 | 33.333333 | 0.0 | 6 | 42.857143 | 1 |
97 rows × 19 columns
In [46]:
# This graph is basically a line plot of the TotalScore_14 across first quartile unique responses which are ordered by income
df4.loc[df4['Quartile Number']==1].plot(use_index = 'True' , kind = 'line', y = ['TotalScore_14'], title = 'Total Score line plot sorted by Wealth Q1',xlabel = 'Unique responses', ylabel = 'TotalScore_14')
Out[46]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q1'}, xlabel='Unique responses', ylabel='TotalScore_14'>
In [47]:
# Trend line for Absolute Total Facet Score vs unique Q14. Income reponses in Quartile 1
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'TotalScore_14')
Out[47]:
<Axes: ylabel='TotalScore_14'>
In [48]:
# Trend line for Absolute Independent Facet Score vs wealth reponses in Quartile 1
fig1, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Certainty_3', fit_reg=True, ci=None, ax=ax2, label='Certainty_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax2, label='RiskComprehension_4')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax2, label='Numeracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax2, label='GraphLiteracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==1], x = df4.loc[df4['Quartile Number']==1].index, y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax2, label='Bayesianreasoning_1' )
ax2.set(ylabel='Scores', xlabel='wealth')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [49]:
df4.loc[df4['Quartile Number']==2]
Out[49]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 97 | R_4s4QJ9m9ViKLDPf | 19 | 2 | 3 | 1 | 101000.0 | 1 | 0 | 1 | 2 | 0 | 33.333333 | 0.0 | 33.333333 | 66.666667 | 0.0 | 4 | 28.571429 | 2 |
| 98 | R_4qh6umVAgcFqF0h | 22 | 2 | 2 | 1 | 112378.0 | 2 | 3 | 3 | 1 | 0 | 66.666667 | 75.0 | 100.000000 | 33.333333 | 0.0 | 9 | 64.285714 | 2 |
| 99 | R_4kN4Jn7Y8hHdzlY | 23 | 2 | 2 | 1 | 120000.0 | 2 | 0 | 2 | 1 | 0 | 66.666667 | 0.0 | 66.666667 | 33.333333 | 0.0 | 5 | 35.714286 | 2 |
| 100 | R_437fyX4DcVGhqJe | 40 | 3 | 1 | 2 | 150000.0 | 2 | 2 | 1 | 1 | 0 | 66.666667 | 50.0 | 33.333333 | 33.333333 | 0.0 | 6 | 42.857143 | 2 |
| 101 | R_4PFudUMcC09ZJk8 | 36 | 3 | 2 | 2 | 150000.0 | 2 | 3 | 3 | 1 | 0 | 66.666667 | 75.0 | 100.000000 | 33.333333 | 0.0 | 9 | 64.285714 | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 183 | R_4rGgRh3Y7Grn2f5 | 20 | 2 | 2 | 6 | 500000.0 | 2 | 0 | 0 | 1 | 0 | 66.666667 | 0.0 | 0.000000 | 33.333333 | 0.0 | 3 | 21.428571 | 2 |
| 184 | R_40VyTmJ6i96wUP4 | 34 | 2 | 1 | 1 | 500000.0 | 2 | 0 | 1 | 2 | 0 | 66.666667 | 0.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 | 2 |
| 185 | R_4eUXMOw0QY7ovqu | 24 | 2 | 2 | 3 | 500000.0 | 0 | 2 | 1 | 2 | 1 | 0.000000 | 50.0 | 33.333333 | 66.666667 | 100.0 | 6 | 42.857143 | 2 |
| 186 | R_4C2WItrJsoTF1cE | 19 | 2 | 2 | 1 | 500000.0 | 2 | 4 | 1 | 2 | 1 | 66.666667 | 100.0 | 33.333333 | 66.666667 | 100.0 | 10 | 71.428571 | 2 |
| 187 | R_4wdhk4DDjmecH1A | 29 | 2 | 2 | 2 | 500000.0 | 0 | 1 | 2 | 1 | 1 | 0.000000 | 25.0 | 66.666667 | 33.333333 | 100.0 | 5 | 35.714286 | 2 |
91 rows × 19 columns
In [50]:
# This graph is basically a line plot of the TotalScore_14 across second quartile unique responses which are ordered by income
df4.loc[df4['Quartile Number']==2].plot(use_index = 'True' , kind = 'line', y = ['TotalScore_14'], title = 'Total Score line plot sorted by Wealth Q1',xlabel = 'Unique responses', ylabel = 'TotalScore_14')
Out[50]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q1'}, xlabel='Unique responses', ylabel='TotalScore_14'>
In [51]:
# Trend line for Absolute Total Facet Score vs unique wealth reponses in Quartile 2
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'TotalScore_14')
Out[51]:
<Axes: ylabel='TotalScore_14'>
In [52]:
# Trend line for Absolute Independent Facet Score vs wealth reponses in Quartile 2
fig1, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Certainty_3', fit_reg=True, ci=None, ax=ax3, label='Certainty_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax3, label='RiskComprehension_4')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax3, label='Numeracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax3, label='GraphLiteracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==2], x = df4.loc[df4['Quartile Number']==2].index, y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax3, label='Bayesianreasoning_1' )
ax3.set(ylabel='Scores', xlabel='wealth')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [53]:
df4.loc[df4['Quartile Number']==3]
Out[53]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 188 | R_4MbebluzsBWokIc | 57 | 3 | 2 | 2 | 550000.0 | 1 | 3 | 2 | 1 | 0 | 33.333333 | 75.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 3 |
| 189 | R_4TuHdn6ifelyTqF | 42 | 3 | 2 | 2 | 600000.0 | 2 | 4 | 2 | 1 | 1 | 66.666667 | 100.0 | 66.666667 | 33.333333 | 100.0 | 10 | 71.428571 | 3 |
| 190 | R_4Issx8w7LuZg7Yt | 28 | 2 | 2 | 2 | 600000.0 | 2 | 2 | 1 | 2 | 0 | 66.666667 | 50.0 | 33.333333 | 66.666667 | 0.0 | 7 | 50.000000 | 3 |
| 191 | R_4jTmQ2fyl4GIZKf | 24 | 2 | 2 | 1 | 600000.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 3 |
| 192 | R_4EpvHZs1AkqudDX | 24 | 2 | 2 | 1 | 600000.0 | 2 | 3 | 3 | 1 | 1 | 66.666667 | 75.0 | 100.000000 | 33.333333 | 100.0 | 10 | 71.428571 | 3 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 279 | R_4IXCBojUC4PBdve | 51 | 3 | 2 | 2 | 3000000.0 | 2 | 2 | 3 | 2 | 1 | 66.666667 | 50.0 | 100.000000 | 66.666667 | 100.0 | 10 | 71.428571 | 3 |
| 280 | R_4EcDVJCgRS2diGg | 32 | 2 | 1 | 5 | 3000000.0 | 0 | 2 | 2 | 1 | 1 | 0.000000 | 50.0 | 66.666667 | 33.333333 | 100.0 | 6 | 42.857143 | 3 |
| 281 | R_41hdf8aceebqOfl | 25 | 2 | 2 | 2 | 3000000.0 | 2 | 3 | 1 | 2 | 0 | 66.666667 | 75.0 | 33.333333 | 66.666667 | 0.0 | 8 | 57.142857 | 3 |
| 282 | R_4EneslabJXcCOT4 | 18 | 2 | 3 | 1 | 3000000.0 | 2 | 3 | 2 | 1 | 0 | 66.666667 | 75.0 | 66.666667 | 33.333333 | 0.0 | 8 | 57.142857 | 3 |
| 283 | R_4pPBZhFSJIbPPha | 45 | 3 | 1 | 3 | 3000000.0 | 0 | 3 | 2 | 1 | 1 | 0.000000 | 75.0 | 66.666667 | 33.333333 | 100.0 | 7 | 50.000000 | 3 |
96 rows × 19 columns
In [54]:
# This graph is basically a line plot of the TotalScore_14 across third quartile unique responses which are ordered by wealth
df4.loc[df4['Quartile Number']==3].plot(use_index = 'True' , kind = 'line', y = ['TotalScore_14'], title = 'Total Score line plot sorted by Wealth Q3',xlabel = 'Unique responses', ylabel = 'TotalScore_14')
Out[54]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q3'}, xlabel='Unique responses', ylabel='TotalScore_14'>
In [55]:
# Trend line for Absolute Total Facet Score vs unique wealth reponses in Quartile 3
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'TotalScore_14')
Out[55]:
<Axes: ylabel='TotalScore_14'>
In [56]:
# Trend line for Absolute Independent Facet Score vs wealth reponses in Quartile 3
fig1, ax4 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Certainty_3', fit_reg=True, ci=None, ax=ax4, label='Certainty_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax4, label='RiskComprehension_4')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax4, label='Numeracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax4, label='GraphLiteracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==3], x = df4.loc[df4['Quartile Number']==3].index, y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax4, label='Bayesianreasoning_1' )
ax4.set(ylabel='Scores', xlabel='wealth')
ax4.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [57]:
df4.loc[df4['Quartile Number']==4]
Out[57]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | Quartile Number | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 284 | R_4FVQvDpbnyvqOQY | 28 | 2 | 1 | 2 | 3200000.0 | 0 | 1 | 0 | 2 | 0 | 0.000000 | 25.0 | 0.000000 | 66.666667 | 0.0 | 3 | 21.428571 | 4 |
| 285 | R_4dyDxqwOf6c5bUt | 46 | 3 | 1 | 2 | 4000000.0 | 2 | 3 | 2 | 1 | 0 | 66.666667 | 75.0 | 66.666667 | 33.333333 | 0.0 | 8 | 57.142857 | 4 |
| 286 | R_4w1ruZDnw0ZQRxf | 22 | 2 | 3 | 4 | 4000000.0 | 0 | 1 | 1 | 2 | 1 | 0.000000 | 25.0 | 33.333333 | 66.666667 | 100.0 | 5 | 35.714286 | 4 |
| 287 | R_4Rft74IKSrfwItv | 24 | 2 | 1 | 1 | 4000000.0 | 1 | 4 | 2 | 2 | 1 | 33.333333 | 100.0 | 66.666667 | 66.666667 | 100.0 | 10 | 71.428571 | 4 |
| 288 | R_4NERL55GtGXfKj7 | 40 | 3 | 2 | 1 | 4000000.0 | 2 | 1 | 2 | 2 | 1 | 66.666667 | 25.0 | 66.666667 | 66.666667 | 100.0 | 8 | 57.142857 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 365 | R_4M4Yd7LGKym7BaF | 32 | 2 | 1 | 3 | 100000000.0 | 2 | 1 | 3 | 2 | 0 | 66.666667 | 25.0 | 100.000000 | 66.666667 | 0.0 | 8 | 57.142857 | 4 |
| 366 | R_4XgkSFVYD7YAAsS | 18 | 2 | 2 | 3 | 100000000.0 | 1 | 1 | 1 | 2 | 0 | 33.333333 | 25.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 | 4 |
| 367 | R_4FZL5zwyR3HMcyl | 20 | 2 | 3 | 2 | 100000000.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 4 |
| 368 | R_4f2KZvIaLdZvVqN | 27 | 2 | 2 | 5 | 120000000.0 | 2 | 2 | 2 | 1 | 0 | 66.666667 | 50.0 | 66.666667 | 33.333333 | 0.0 | 7 | 50.000000 | 4 |
| 369 | R_4kHu85k7anbQck8 | 25 | 2 | 2 | 2 | 150000000.0 | 0 | 3 | 1 | 1 | 1 | 0.000000 | 75.0 | 33.333333 | 33.333333 | 100.0 | 6 | 42.857143 | 4 |
86 rows × 19 columns
In [58]:
# This graph is basically a line plot of the TotalScore_14 across fourth quartile unique responses which are ordered by Wealth
df4.loc[df4['Quartile Number']==4].plot(use_index = 'True' , kind = 'line', y = ['TotalScore_14'], title = 'Total Score line plot sorted by Wealth Q4',xlabel = 'Unique responses', ylabel = 'TotalScore_14')
Out[58]:
<Axes: title={'center': 'Total Score line plot sorted by Wealth Q4'}, xlabel='Unique responses', ylabel='TotalScore_14'>
In [59]:
# Trend line for Absolute Total Facet Score vs unique wealth reponses in Quartile 4
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'TotalScore_14')
Out[59]:
<Axes: ylabel='TotalScore_14'>
In [60]:
# Trend line for Absolute Independent Facet Score vs wealth reponses in Quartile 4
fig1, ax5 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Certainty_3', fit_reg=True, ci=None, ax=ax5, label='Certainty_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax5, label='RiskComprehension_4')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax5, label='Numeracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax5, label='GraphLiteracy_3')
sns.regplot (data = df4.loc[df4['Quartile Number']==4], x = df4.loc[df4['Quartile Number']==4].index, y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax5, label='Bayesianreasoning_1' )
ax5.set(ylabel='Scores', xlabel='wealth')
ax5.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [61]:
# Absolute Total Facet scores mean line plot from low to high Wealth
df4.groupby('Quartile Number')[['TotalScore_14']].mean().plot( kind = 'line', title = 'Absolute Total Facet scores mean line plot low to high wealth', xticks = np.arange(1,5,step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[61]:
<matplotlib.legend.Legend at 0x2aa6018aa90>
In [62]:
# Absolute Facet scores mean line plot from low to high wealth
df4.groupby('Quartile Number')[['Certainty_3','RiskComprehension_4','Numeracy_3','GraphLiteracy_3','Bayesianreasoning_1']].mean().plot( kind = 'line', title = 'Absolute Facet scores mean line plot low to high wealth', xticks = np.arange(1,5,step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[62]:
<matplotlib.legend.Legend at 0x2aa60ce4d50>
In [63]:
# Normalised Facet scores mean line plot from low to high wealth
df4.groupby('Quartile Number')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%','TotalScore_%']].mean().plot( kind = 'line', title = 'Normalised Facet scores mean line plot from low to high wealth', xticks = np.arange(1,5,step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[63]:
<matplotlib.legend.Legend at 0x2aa60343190>
In [64]:
# Normalised mean scores for each facet stacked
df4.groupby('Quartile Number')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%']].mean().plot( kind = 'bar', title = 'Normalised mean scores for each facet stacked', stacked = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[64]:
<matplotlib.legend.Legend at 0x2aa60307310>
In [65]:
# Normalised mean scores for each facet for each wealth response Quartile
df4.groupby('Quartile Number')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%','TotalScore_%']].mean().T.plot(kind = 'bar', title = 'Normalised mean scores for each facet for each wealth response category').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[65]:
<matplotlib.legend.Legend at 0x2aa602fffd0>
In [66]:
# Trend line for Absolute Total Facet Score vs Quartiles
sns.regplot (data = df4, x = 'Quartile Number', y = 'TotalScore_14')
Out[66]:
<Axes: xlabel='Quartile Number', ylabel='TotalScore_14'>
In [67]:
# Trend line for Absolute Independent Facet Score vs Quartiles
fig2, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df4, x = 'Quartile Number', y = 'Certainty_3', fit_reg=True, ci=None, ax=ax2, label='Certainty_3')
sns.regplot (data = df4, x = 'Quartile Number', y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax2, label='RiskComprehension_4')
sns.regplot (data = df4, x = 'Quartile Number', y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax2, label='Numeracy_3')
sns.regplot (data = df4, x = 'Quartile Number', y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax2, label='GraphLiteracy_3')
sns.regplot (data = df4, x = 'Quartile Number', y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax2, label='Bayesianreasoning_1' )
ax2.set(ylabel='Scores', xlabel='wealth')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [68]:
# Violine Plot for TotalScore_14 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'TotalScore_14')
Out[68]:
<Axes: xlabel='Quartile Number', ylabel='TotalScore_14'>
In [69]:
# Violine Plot for Certainty_3 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Certainty_3')
Out[69]:
<Axes: xlabel='Quartile Number', ylabel='Certainty_3'>
In [70]:
# Violine Plot for RiskComprehension_4 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'RiskComprehension_4')
Out[70]:
<Axes: xlabel='Quartile Number', ylabel='RiskComprehension_4'>
In [71]:
# Violine Plot for GraphLiteracy_3 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'GraphLiteracy_3')
Out[71]:
<Axes: xlabel='Quartile Number', ylabel='GraphLiteracy_3'>
In [72]:
# Violine Plot for Numeracy_3 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Numeracy_3')
Out[72]:
<Axes: xlabel='Quartile Number', ylabel='Numeracy_3'>
In [73]:
# Violine Plot for Bayesianreasoning_1 for each entry under Wealth Quartiles.
sns.violinplot( data = df4, x = 'Quartile Number', y = 'Bayesianreasoning_1')
Out[73]:
<Axes: xlabel='Quartile Number', ylabel='Bayesianreasoning_1'>
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [74]:
# Data Frame 5 or df5 is an aggregate data on facet total scores and over all total score, along with education data vs each response.
# This data set is now sorted by education.
df5 = df2.sort_values(by = 'isced')
df5 = df5.reset_index(drop = True)
df5
Out[74]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R_4XhfVqNhg49zZXe | 37 | 3 | 1 | 2 | 6000000.0 | 2 | 0 | 1 | 2 | 0 | 66.666667 | 0.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 |
| 1 | R_4FjN0R9SXA0s0G5 | 53 | 3 | 1 | 2 | 5000000.0 | 2 | 2 | 1 | 1 | 1 | 66.666667 | 50.0 | 33.333333 | 33.333333 | 100.0 | 7 | 50.000000 |
| 2 | R_4Ph1UNtYnaoPSX7 | 25 | 2 | 1 | 3 | 400000.0 | 0 | 2 | 0 | 1 | 0 | 0.000000 | 50.0 | 0.000000 | 33.333333 | 0.0 | 3 | 21.428571 |
| 3 | R_4FVQvDpbnyvqOQY | 28 | 2 | 1 | 2 | 3200000.0 | 0 | 1 | 0 | 2 | 0 | 0.000000 | 25.0 | 0.000000 | 66.666667 | 0.0 | 3 | 21.428571 |
| 4 | R_4PjnTbc2vIjogot | 31 | 2 | 1 | 1 | 200000.0 | 2 | 1 | 0 | 1 | 0 | 66.666667 | 25.0 | 0.000000 | 33.333333 | 0.0 | 4 | 28.571429 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 365 | R_4fkCPjfYl6UggHL | 18 | 2 | 3 | 1 | 1000000.0 | 2 | 3 | 1 | 2 | 1 | 66.666667 | 75.0 | 33.333333 | 66.666667 | 100.0 | 9 | 64.285714 |
| 366 | R_4K6WL8dnMy7ODbs | 18 | 2 | 3 | 6 | 100000.0 | 2 | 0 | 1 | 2 | 0 | 66.666667 | 0.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 |
| 367 | R_4ANgmQQx8TBl3kc | 21 | 2 | 3 | 1 | 16000000.0 | 0 | 4 | 2 | 1 | 1 | 0.000000 | 100.0 | 66.666667 | 33.333333 | 100.0 | 8 | 57.142857 |
| 368 | R_4MJAzsNYYA69Y8p | 27 | 2 | 3 | 1 | 0.0 | 2 | 1 | 1 | 2 | 1 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 100.0 | 7 | 50.000000 |
| 369 | R_4EneslabJXcCOT4 | 18 | 2 | 3 | 1 | 3000000.0 | 2 | 3 | 2 | 1 | 0 | 66.666667 | 75.0 | 66.666667 | 33.333333 | 0.0 | 8 | 57.142857 |
370 rows × 18 columns
In [75]:
# since we didnt drop any values, the dataset is essentially the same as df2, but sorted
In [76]:
# Mean of each facet as a % for each category of isced
df5.groupby('isced')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%']].mean()
Out[76]:
| Certainty_% | RiskComprehension_% | Numeracy_% | GraphLiteracy_% | Bayesianreasoning_% | |
|---|---|---|---|---|---|
| isced | |||||
| 1 | 51.851852 | 44.444444 | 48.677249 | 49.470899 | 28.571429 |
| 2 | 51.741294 | 46.268657 | 47.097844 | 51.741294 | 31.840796 |
| 3 | 58.139535 | 41.279070 | 49.612403 | 47.286822 | 32.558140 |
In [77]:
# Count of responses for each category isced
df5.groupby('isced')[['ResponseId']].count()
Out[77]:
| ResponseId | |
|---|---|
| isced | |
| 1 | 126 |
| 2 | 201 |
| 3 | 43 |
In [78]:
# Absolute Total Facet scores mean line plot from low to high education
df5.groupby('isced')[['TotalScore_14']].mean().plot( kind = 'line', title = 'Absolute Total Facet scores mean line plot', xticks = np.arange(1,4, step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[78]:
<matplotlib.legend.Legend at 0x2aa60e82c90>
In [79]:
# Absolute Facet scores mean line plot from low to high edu
df5.groupby('isced')[['Certainty_3','RiskComprehension_4','Numeracy_3','GraphLiteracy_3','Bayesianreasoning_1']].mean().plot( kind = 'line', title = 'Absolute Facet scores mean line plot', xticks = np.arange(1,4, step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[79]:
<matplotlib.legend.Legend at 0x2aa67800950>
In [80]:
# Normalised Facet scores mean line plot from low to high edu
df5.groupby('isced')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%','TotalScore_%']].mean().plot( kind = 'line', title = 'Normalised Facet scores mean line plot', xticks = np.arange(1,4, step = 1)).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[80]:
<matplotlib.legend.Legend at 0x2aa67803110>
In [81]:
# Normalised mean scores for each facet stacked
df5.groupby('isced')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%']].mean().plot( kind = 'bar', title = 'Normalised mean scores for each facet stacked', stacked = True).legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[81]:
<matplotlib.legend.Legend at 0x2aa6589a790>
In [82]:
# Normalised mean scores for each facet for each edu response category
df5.groupby('isced')[['Certainty_%','RiskComprehension_%','Numeracy_%','GraphLiteracy_%','Bayesianreasoning_%','TotalScore_%']].mean().T.plot(kind = 'bar', title = 'Normalised mean scores for each facet for each edu response category').legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
Out[82]:
<matplotlib.legend.Legend at 0x2aa658d0b90>
In [83]:
# Trend line for Absolute Total Facet Score vs edu (isced) reponses
sns.regplot (data = df5, x = 'isced', y = 'TotalScore_14')
Out[83]:
<Axes: xlabel='isced', ylabel='TotalScore_14'>
In [84]:
# Trend line for Absolute Independent Facet Score vs edu (isced) reponses
fig, ax6 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df5, x = 'isced', y = 'Certainty_3', fit_reg=True, ci=None, ax=ax6, label='Certainty_3')
sns.regplot (data = df5, x = 'isced', y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax6, label='RiskComprehension_4')
sns.regplot (data = df5, x = 'isced', y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax6, label='Numeracy_3')
sns.regplot (data = df5, x = 'isced', y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax6, label='GraphLiteracy_3')
sns.regplot (data = df5, x = 'isced', y = 'Bayesianreasoning_1',fit_reg=True, ci=None, ax=ax6, label='Bayesianreasoning_1' )
ax6.set(ylabel='Scores', xlabel='isced')
ax6.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [85]:
# Violine Plot for TotalScore_14 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'TotalScore_14')
Out[85]:
<Axes: xlabel='isced', ylabel='TotalScore_14'>
In [86]:
# Violine Plot for Certainty_3 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Certainty_3')
Out[86]:
<Axes: xlabel='isced', ylabel='Certainty_3'>
In [87]:
# Violine Plot for RiskComprehension_4 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'RiskComprehension_4')
Out[87]:
<Axes: xlabel='isced', ylabel='RiskComprehension_4'>
In [88]:
# Violine Plot for GraphLiteracy_3 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'GraphLiteracy_3')
Out[88]:
<Axes: xlabel='isced', ylabel='GraphLiteracy_3'>
In [89]:
# Violine Plot for Numeracy_3 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Numeracy_3')
Out[89]:
<Axes: xlabel='isced', ylabel='Numeracy_3'>
In [90]:
# Violine Plot for TotalScore_14 for each entry under edu.
sns.violinplot( data = df5, x = 'isced', y = 'Bayesianreasoning_1')
Out[90]:
<Axes: xlabel='isced', ylabel='Bayesianreasoning_1'>
In [91]:
# Descriptive stats for the data set, isced = 1
# NA values of wealth are removed
df6 = df5.dropna(axis = 0, subset = 'wealth')
# df6.drop(df6[df6['wealth'] == 3500000].index, inplace = True)
df6.loc[df6['isced']==1][['wealth']].describe()
Out[91]:
| wealth | |
|---|---|
| count | 1.260000e+02 |
| mean | 4.726376e+06 |
| std | 1.304076e+07 |
| min | 0.000000e+00 |
| 25% | 2.000000e+05 |
| 50% | 1.000000e+06 |
| 75% | 4.000000e+06 |
| max | 1.000000e+08 |
In [92]:
# Descriptive stats for the data set, isced = 2
df6.loc[df6['isced']==2][['wealth']].describe()
Out[92]:
| wealth | |
|---|---|
| count | 2.010000e+02 |
| mean | 5.055470e+06 |
| std | 1.647852e+07 |
| min | 0.000000e+00 |
| 25% | 1.000000e+05 |
| 50% | 5.000000e+05 |
| 75% | 2.500000e+06 |
| max | 1.500000e+08 |
In [93]:
# Descriptive stats for the data set, isced = 3
df6.loc[df6['isced']==3][['wealth']].describe()
Out[93]:
| wealth | |
|---|---|
| count | 4.300000e+01 |
| mean | 4.133750e+06 |
| std | 1.532675e+07 |
| min | 0.000000e+00 |
| 25% | 7.000000e+04 |
| 50% | 4.000000e+05 |
| 75% | 1.750000e+06 |
| max | 1.000000e+08 |
In [94]:
# Trend line for Absolute Tota Facet Score vs wealth reponses sorted by isced and ORDERED by wealth WITH scatter
fig, ax7 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax7, label='ISCED = 1')
sns.regplot (data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax7, label='ISCED = 2')
sns.regplot (data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax7, label='ISCED = 3')
ax7.set(ylabel='Total Scores_19', xlabel='Wealth')
ax7.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [95]:
# Trend line for Absolute Tota Facet Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax8 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax8, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax8, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax8, label='ISCED = 3')
ax8.set(ylabel='Total Scores_19', xlabel='Wealth')
ax8.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [96]:
# Trend line for Absolute Certainty Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax9 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'Certainty_3', fit_reg=True, ci=None, ax=ax9, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'Certainty_3', fit_reg=True, ci=None, ax=ax9, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'Certainty_3', fit_reg=True, ci=None, ax=ax9, label='ISCED = 3')
ax9.set(ylabel='Certainty_3', xlabel='Wealth')
ax9.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [97]:
# Trend line for Absolute Uncertainty Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax10 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax10, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax10, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax10, label='ISCED = 3')
ax10.set(ylabel='RiskComprehension_4', xlabel='Wealth')
ax10.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [98]:
# Trend line for Absolute Number Comprehension Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='Numeracy_3', xlabel='Wealth')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [99]:
# Trend line for Absolute Graph Comprehension Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='GraphLiteracy_3', xlabel='Wealth')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [100]:
# Trend line for Absolute Bayesian Reasoning Score vs wealth reponses sorted by isced and ORDERED by wealth WITHOUT scatter
fig, ax12 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df6.loc[df6['isced']==1].reset_index(drop = True), x = df6.loc[df6['isced']==1].reset_index(drop = True)['wealth'], y = 'Bayesianreasoning_1', fit_reg=True, ci=None, ax=ax12, label='ISCED = 1')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==2].reset_index(drop = True), x = df6.loc[df6['isced']==2].reset_index(drop = True)['wealth'], y = 'Bayesianreasoning_1', fit_reg=True, ci=None, ax=ax12, label='ISCED = 2')
sns.regplot (scatter = False, data = df6.loc[df6['isced']==3].reset_index(drop = True), x = df6.loc[df6['isced']==3].reset_index(drop = True)['wealth'], y = 'Bayesianreasoning_1', fit_reg=True, ci=None, ax=ax12, label='ISCED = 3')
ax12.set(ylabel='Bayesianreasoning_1', xlabel='Wealth')
ax12.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [101]:
# Descriptive stats for the data set, isced = 1
# NA values of income are removed
df7 = df5
df7.drop(df7[df7['income'] == 7].index, inplace = True)
df7.loc[df7['isced']==1].describe()
Out[101]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 126.000000 | 126.000000 | 126.0 | 126.000000 | 1.260000e+02 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 | 126.000000 |
| mean | 33.087302 | 2.349206 | 1.0 | 1.888889 | 4.726376e+06 | 1.555556 | 1.777778 | 1.484127 | 1.460317 | 0.285714 | 51.851852 | 44.444444 | 49.470899 | 48.677249 | 28.571429 | 6.563492 | 46.882086 |
| std | 9.354374 | 0.541368 | 0.0 | 1.097067 | 1.304076e+07 | 0.785423 | 1.144649 | 0.952757 | 0.500413 | 0.453557 | 26.180758 | 28.616235 | 31.758569 | 16.680418 | 45.355737 | 1.745834 | 12.470243 |
| min | 18.000000 | 2.000000 | 1.0 | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 3.000000 | 21.428571 |
| 25% | 27.000000 | 2.000000 | 1.0 | 1.000000 | 2.000000e+05 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 33.333333 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 50% | 31.000000 | 2.000000 | 1.0 | 2.000000 | 1.000000e+06 | 2.000000 | 2.000000 | 1.000000 | 1.000000 | 0.000000 | 66.666667 | 50.000000 | 33.333333 | 33.333333 | 0.000000 | 7.000000 | 50.000000 |
| 75% | 37.000000 | 3.000000 | 1.0 | 2.000000 | 4.000000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 70.000000 | 4.000000 | 1.0 | 6.000000 | 1.000000e+08 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 11.000000 | 78.571429 |
In [102]:
df7.loc[df7['isced']==2].describe()
Out[102]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 201.000000 | 201.000000 | 201.0 | 201.000000 | 2.010000e+02 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 | 201.000000 |
| mean | 27.716418 | 2.139303 | 2.0 | 1.706468 | 5.055470e+06 | 1.552239 | 1.850746 | 1.552239 | 1.412935 | 0.318408 | 51.741294 | 46.268657 | 51.741294 | 47.097844 | 31.840796 | 6.686567 | 47.761194 |
| std | 8.642579 | 0.387940 | 0.0 | 1.080929 | 1.647852e+07 | 0.773633 | 1.314387 | 1.033686 | 0.493591 | 0.467022 | 25.787755 | 32.859663 | 34.456212 | 16.453025 | 46.702190 | 2.268539 | 16.203849 |
| min | 18.000000 | 2.000000 | 2.0 | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 1.000000 | 7.142857 |
| 25% | 22.000000 | 2.000000 | 2.0 | 1.000000 | 1.000000e+05 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 33.333333 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 50% | 25.000000 | 2.000000 | 2.0 | 1.000000 | 5.000000e+05 | 2.000000 | 2.000000 | 2.000000 | 1.000000 | 0.000000 | 66.666667 | 50.000000 | 66.666667 | 33.333333 | 0.000000 | 6.000000 | 42.857143 |
| 75% | 31.000000 | 2.000000 | 2.0 | 2.000000 | 2.500000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 70.000000 | 4.000000 | 2.0 | 6.000000 | 1.500000e+08 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 12.000000 | 85.714286 |
In [103]:
df7.loc[df7['isced']==3].describe()
Out[103]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 43.000000 | 43.000000 | 43.0 | 43.000000 | 4.300000e+01 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 | 43.000000 |
| mean | 22.255814 | 2.093023 | 3.0 | 1.837209 | 4.133750e+06 | 1.744186 | 1.651163 | 1.418605 | 1.488372 | 0.325581 | 58.139535 | 41.279070 | 47.286822 | 49.612403 | 32.558140 | 6.627907 | 47.342193 |
| std | 6.959382 | 0.293903 | 0.0 | 1.446346 | 1.532675e+07 | 0.758853 | 1.307191 | 0.879192 | 0.505781 | 0.474137 | 25.295108 | 32.679778 | 29.306407 | 16.859351 | 47.413732 | 1.826045 | 13.043179 |
| min | 16.000000 | 2.000000 | 3.0 | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 2.000000 | 14.285714 |
| 25% | 18.000000 | 2.000000 | 3.0 | 1.000000 | 7.000000e+04 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 66.666667 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 6.000000 | 42.857143 |
| 50% | 19.000000 | 2.000000 | 3.0 | 1.000000 | 4.000000e+05 | 2.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 66.666667 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 6.000000 | 42.857143 |
| 75% | 22.000000 | 2.000000 | 3.0 | 2.000000 | 1.750000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 46.000000 | 3.000000 | 3.0 | 6.000000 | 1.000000e+08 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 11.000000 | 78.571429 |
In [104]:
# Trend line for Absolute Tota Facet Score vs income reponses sorted by isced and ORDERED by income WITH scatter
fig, ax7 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = df7.loc[df7['isced']==1]['TotalScore_14'], fit_reg=True, ci=None, ax=ax7, label='ISCED = 1')
sns.regplot (data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = df7.loc[df7['isced']==2]['TotalScore_14'], fit_reg=True, ci=None, ax=ax7, label='ISCED = 2')
sns.regplot (data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = df7.loc[df7['isced']==3]['TotalScore_14'], fit_reg=True, ci=None, ax=ax7, label='ISCED = 3')
ax7.set(ylabel='Total Scores_19', xlabel='INCOME')
ax7.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [105]:
# Trend line for Absolute Tota Facet Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax8 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax8, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax8, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = 'TotalScore_14', fit_reg=True, ci=None, ax=ax8, label='ISCED = 3')
ax8.set(ylabel='Total Scores_19', xlabel='INCOME')
ax8.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [106]:
# Trend line for Absolute Certainty Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax9 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = 'Certainty_3', fit_reg=True, ci=None, ax=ax9, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = 'Certainty_3', fit_reg=True, ci=None, ax=ax9, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = 'Certainty_3', fit_reg=True, ci=None, ax=ax9, label='ISCED = 3')
ax9.set(ylabel='Certainty_3', xlabel='INCOME')
ax9.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [107]:
# Trend line for Absolute Risk Comprehension Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax10 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax10, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax10, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = 'RiskComprehension_4', fit_reg=True, ci=None, ax=ax10, label='ISCED = 3')
ax10.set(ylabel='RiskComprehension_4', xlabel='INCOME')
ax10.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [108]:
# Trend line for Absolute Number Comprehension Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = 'Numeracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='Numeracy_3', xlabel='INCOME')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [109]:
# Trend line for Absolute Graph Comprehension Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax11 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = 'GraphLiteracy_3', fit_reg=True, ci=None, ax=ax11, label='ISCED = 3')
ax11.set(ylabel='GraphLiteracy_3', xlabel='INCOME')
ax11.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [110]:
# Trend line for Absolute Bayesian Reasoning Score vs income reponses sorted by isced and ORDERED by income WITHOUT scatter
fig, ax12 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[df7['isced']==1].reset_index(drop = True), x = 'income', y = 'Bayesianreasoning_1', fit_reg=True, ci=None, ax=ax12, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==2].reset_index(drop = True), x = 'income', y = 'Bayesianreasoning_1', fit_reg=True, ci=None, ax=ax12, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[df7['isced']==3].reset_index(drop = True), x = 'income', y = 'Bayesianreasoning_1', fit_reg=True, ci=None, ax=ax12, label='ISCED = 3')
ax12.set(ylabel='Bayesianreasoning_1', xlabel='INCOME')
ax12.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [111]:
df7
Out[111]:
| ResponseId | age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | R_4XhfVqNhg49zZXe | 37 | 3 | 1 | 2 | 6000000.0 | 2 | 0 | 1 | 2 | 0 | 66.666667 | 0.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 |
| 1 | R_4FjN0R9SXA0s0G5 | 53 | 3 | 1 | 2 | 5000000.0 | 2 | 2 | 1 | 1 | 1 | 66.666667 | 50.0 | 33.333333 | 33.333333 | 100.0 | 7 | 50.000000 |
| 2 | R_4Ph1UNtYnaoPSX7 | 25 | 2 | 1 | 3 | 400000.0 | 0 | 2 | 0 | 1 | 0 | 0.000000 | 50.0 | 0.000000 | 33.333333 | 0.0 | 3 | 21.428571 |
| 3 | R_4FVQvDpbnyvqOQY | 28 | 2 | 1 | 2 | 3200000.0 | 0 | 1 | 0 | 2 | 0 | 0.000000 | 25.0 | 0.000000 | 66.666667 | 0.0 | 3 | 21.428571 |
| 4 | R_4PjnTbc2vIjogot | 31 | 2 | 1 | 1 | 200000.0 | 2 | 1 | 0 | 1 | 0 | 66.666667 | 25.0 | 0.000000 | 33.333333 | 0.0 | 4 | 28.571429 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 365 | R_4fkCPjfYl6UggHL | 18 | 2 | 3 | 1 | 1000000.0 | 2 | 3 | 1 | 2 | 1 | 66.666667 | 75.0 | 33.333333 | 66.666667 | 100.0 | 9 | 64.285714 |
| 366 | R_4K6WL8dnMy7ODbs | 18 | 2 | 3 | 6 | 100000.0 | 2 | 0 | 1 | 2 | 0 | 66.666667 | 0.0 | 33.333333 | 66.666667 | 0.0 | 5 | 35.714286 |
| 367 | R_4ANgmQQx8TBl3kc | 21 | 2 | 3 | 1 | 16000000.0 | 0 | 4 | 2 | 1 | 1 | 0.000000 | 100.0 | 66.666667 | 33.333333 | 100.0 | 8 | 57.142857 |
| 368 | R_4MJAzsNYYA69Y8p | 27 | 2 | 3 | 1 | 0.0 | 2 | 1 | 1 | 2 | 1 | 66.666667 | 25.0 | 33.333333 | 66.666667 | 100.0 | 7 | 50.000000 |
| 369 | R_4EneslabJXcCOT4 | 18 | 2 | 3 | 1 | 3000000.0 | 2 | 3 | 2 | 1 | 0 | 66.666667 | 75.0 | 66.666667 | 33.333333 | 0.0 | 8 | 57.142857 |
370 rows × 18 columns
In [112]:
# Since we already have a classification for Age groups in the form of age_rec, we will use that.
# We will also use median of age to see if it yields any relevant results, as instructed.
# AXES to be used = Age or age groups, ISCED, Income
# Age groups = [2,3,4]
# Age group 2 = 18 to 35 y/o
# Age group 3 = 36 to 59 y/o
# Age group 4 = 60 to 75 y/o (75 y/o, i.e, within the scope of the data we have, it can mean 60 and above also)
In [113]:
df7.loc[(df7['age_rec']==2)].describe()
Out[113]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 301.000000 | 301.0 | 301.000000 | 301.000000 | 3.010000e+02 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 | 301.000000 |
| mean | 25.411960 | 2.0 | 1.843854 | 1.750831 | 4.944977e+06 | 1.581395 | 1.807309 | 1.531561 | 1.445183 | 0.312292 | 52.713178 | 45.182724 | 51.052049 | 48.172757 | 31.229236 | 6.677741 | 47.698149 |
| std | 4.902012 | 0.0 | 0.626262 | 1.122961 | 1.647609e+07 | 0.768669 | 1.279093 | 1.001582 | 0.497814 | 0.464200 | 25.622315 | 31.977333 | 33.386079 | 16.593787 | 46.420009 | 2.078253 | 14.844665 |
| min | 16.000000 | 2.0 | 1.000000 | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 1.000000 | 7.142857 |
| 25% | 21.000000 | 2.0 | 1.000000 | 1.000000 | 1.000000e+05 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 33.333333 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 50% | 25.000000 | 2.0 | 2.000000 | 1.000000 | 5.000000e+05 | 2.000000 | 2.000000 | 2.000000 | 1.000000 | 0.000000 | 66.666667 | 50.000000 | 66.666667 | 33.333333 | 0.000000 | 6.000000 | 42.857143 |
| 75% | 29.000000 | 2.0 | 2.000000 | 2.000000 | 2.500000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 35.000000 | 2.0 | 3.000000 | 6.000000 | 1.500000e+08 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 12.000000 | 85.714286 |
In [114]:
df7.loc[(df7['age_rec']==3)].describe()
Out[114]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 62.000000 | 62.0 | 62.000000 | 62.000000 | 6.200000e+01 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 | 62.000000 |
| mean | 41.903226 | 3.0 | 1.483871 | 1.870968 | 4.204371e+06 | 1.564516 | 1.741935 | 1.451613 | 1.370968 | 0.306452 | 52.150538 | 43.548387 | 48.387097 | 45.698925 | 30.645161 | 6.435484 | 45.967742 |
| std | 5.835757 | 0.0 | 0.620469 | 1.108593 | 7.989684e+06 | 0.802066 | 1.172576 | 0.935238 | 0.487007 | 0.464783 | 26.735545 | 29.314394 | 31.174589 | 16.233576 | 46.478303 | 1.980404 | 14.145741 |
| min | 36.000000 | 3.0 | 1.000000 | 1.000000 | 1.000000e+03 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 2.000000 | 14.285714 |
| 25% | 37.000000 | 3.0 | 1.000000 | 1.000000 | 1.500000e+05 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 33.333333 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 50% | 40.000000 | 3.0 | 1.000000 | 2.000000 | 1.000000e+06 | 2.000000 | 2.000000 | 1.000000 | 1.000000 | 0.000000 | 66.666667 | 50.000000 | 33.333333 | 33.333333 | 0.000000 | 6.500000 | 46.428571 |
| 75% | 44.000000 | 3.0 | 2.000000 | 2.000000 | 5.000000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 59.000000 | 3.0 | 3.000000 | 6.000000 | 5.000000e+07 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 11.000000 | 78.571429 |
In [115]:
df7.loc[(df7['age_rec']==4)].describe()
Out[115]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 7.000000 | 7.0 | 7.000000 | 7.000000 | 7.000000e+00 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 | 7.000000 |
| mean | 64.285714 | 4.0 | 1.428571 | 2.428571 | 5.759286e+06 | 1.428571 | 2.142857 | 1.285714 | 1.714286 | 0.142857 | 47.619048 | 53.571429 | 42.857143 | 57.142857 | 14.285714 | 6.714286 | 47.959184 |
| std | 4.151879 | 0.0 | 0.534522 | 1.718249 | 4.039660e+06 | 0.975900 | 1.069045 | 0.951190 | 0.487950 | 0.377964 | 32.530002 | 26.726124 | 31.706324 | 16.265001 | 37.796447 | 1.496026 | 10.685903 |
| min | 61.000000 | 4.0 | 1.000000 | 1.000000 | 3.500000e+05 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 25% | 61.000000 | 4.0 | 1.000000 | 1.500000 | 2.750000e+06 | 1.000000 | 2.000000 | 0.500000 | 1.500000 | 0.000000 | 33.333333 | 50.000000 | 16.666667 | 50.000000 | 0.000000 | 5.500000 | 39.285714 |
| 50% | 62.000000 | 4.0 | 1.000000 | 2.000000 | 7.000000e+06 | 2.000000 | 2.000000 | 2.000000 | 2.000000 | 0.000000 | 66.666667 | 50.000000 | 66.666667 | 66.666667 | 0.000000 | 7.000000 | 50.000000 |
| 75% | 67.500000 | 4.0 | 2.000000 | 2.500000 | 8.732500e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 0.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 0.000000 | 7.500000 | 53.571429 |
| max | 70.000000 | 4.0 | 2.000000 | 6.000000 | 1.000000e+07 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 9.000000 | 64.285714 |
In [ ]:
In [116]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Total Scores_14', xlabel='INCOME 18 to 35')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_14', xlabel='INCOME 36 to 59')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Total Scores_14', xlabel='INCOME 60 to 75')
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [117]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==2 ].groupby(['income']).mean(numeric_only=True)['TotalScore_14'], yerr = df7.loc[ (df7['age_rec']==2) ].groupby(['income'])['TotalScore_14'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='Total Scores_14', xlabel='INCOME 18 to 35', yticks = np.arange(5, 16 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_14', xlabel='INCOME 36 to 59', yticks = np.arange(5, 16 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==3 ].groupby(['income']).mean(numeric_only=True)['TotalScore_14'], yerr = df7.loc[ (df7['age_rec']==3) ].groupby(['income'])['TotalScore_14'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Total Scores_14', xlabel='INCOME 60 to 75', yticks = np.arange(5, 16 , 1))
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax3.errorbar(x = df7.loc[ df7['age_rec']== 4 ]['income'].unique(), y = df7.loc[ df7['age_rec']==4 ].groupby(['income']).mean(numeric_only=True)['TotalScore_14'], yerr = df7.loc[ (df7['age_rec']==4) ].groupby(['income'])['TotalScore_14'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
In [118]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Certainty_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Certainty_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Certainty_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Certainty_3', xlabel='INCOME 18 to 35', yticks = np.arange(0, 6 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==2 ].groupby(['income'])['Certainty_3'].mean(), yerr = df7.loc[ (df7['age_rec']==2) ].groupby(['income'])['Certainty_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Certainty_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Certainty_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Certainty_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Certainty_3', xlabel='INCOME 36 to 59', yticks = np.arange(0, 6 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==3 ].groupby(['income'])['Certainty_3'].mean(), yerr = df7.loc[ (df7['age_rec']==3) ].groupby(['income'])['Certainty_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Certainty_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Certainty_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Certainty_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Certainty_3', xlabel='INCOME 60 to 75', yticks = np.arange(0, 6 , 1))
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax3.errorbar(x = df7.loc[ df7['age_rec']== 4 ]['income'].unique(), y = df7.loc[ df7['age_rec']==4 ].groupby(['income'])['Certainty_3'].mean(), yerr = df7.loc[ (df7['age_rec']==4) ].groupby(['income'])['Certainty_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
In [119]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='RiskComprehension_4', xlabel='INCOME 18 to 35', yticks = np.arange(0, 3 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==2 ].groupby(['income'])['RiskComprehension_4'].mean(), yerr = df7.loc[ (df7['age_rec']==2) ].groupby(['income'])['RiskComprehension_4'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='RiskComprehension_4', xlabel='INCOME 36 to 59', yticks = np.arange(0, 3 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==3 ].groupby(['income'])['RiskComprehension_4'].mean(), yerr = df7.loc[ (df7['age_rec']==3) ].groupby(['income'])['RiskComprehension_4'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='RiskComprehension_4', xlabel='INCOME 60 to 75', yticks = np.arange(0, 3 , 1))
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax3.errorbar(x = df7.loc[ df7['age_rec']== 4 ]['income'].unique(), y = df7.loc[ df7['age_rec']==4 ].groupby(['income'])['RiskComprehension_4'].mean(), yerr = df7.loc[ (df7['age_rec']==4) ].groupby(['income'])['RiskComprehension_4'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
In [120]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Numeracy_3', xlabel='INCOME 18 to 35', yticks = np.arange(0, 6 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==2 ].groupby(['income'])['Numeracy_3'].mean(), yerr = df7.loc[ (df7['age_rec']==2) ].groupby(['income'])['Numeracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Numeracy_3', xlabel='INCOME 36 to 59', yticks = np.arange(0, 6 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==3 ].groupby(['income'])['Numeracy_3'].mean(), yerr = df7.loc[ (df7['age_rec']==3) ].groupby(['income'])['Numeracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Numeracy_3', xlabel='INCOME 60 to 75', yticks = np.arange(0, 6 , 1))
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax3.errorbar(x = df7.loc[ df7['age_rec']== 4 ]['income'].unique(), y = df7.loc[ df7['age_rec']==4 ].groupby(['income'])['Numeracy_3'].mean(), yerr = df7.loc[ (df7['age_rec']==4) ].groupby(['income'])['Numeracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
In [121]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='GraphLiteracy_3', xlabel='INCOME 18 to 35', yticks = np.arange(0, 4 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==2 ].groupby(['income'])['GraphLiteracy_3'].mean(), yerr = df7.loc[ (df7['age_rec']==2) ].groupby(['income'])['GraphLiteracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='GraphLiteracy_3', xlabel='INCOME 36 to 59', yticks = np.arange(0, 4 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==3 ].groupby(['income'])['GraphLiteracy_3'].mean(), yerr = df7.loc[ (df7['age_rec']==3) ].groupby(['income'])['GraphLiteracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='GraphLiteracy_3', xlabel='INCOME 60 to 75', yticks = np.arange(0, 4 , 1))
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax3.errorbar(x = df7.loc[ df7['age_rec']== 4 ]['income'].unique(), y = df7.loc[ df7['age_rec']==4 ].groupby(['income'])['GraphLiteracy_3'].mean(), yerr = df7.loc[ (df7['age_rec']==4) ].groupby(['income'])['GraphLiteracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
In [122]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==1)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==2)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==2) & (df7['isced']==3)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Bayesianreasoning_1', xlabel='INCOME 18 to 35', yticks = np.arange(0, 5 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==2 ].groupby(['income'])['Bayesianreasoning_1'].mean(), yerr = df7.loc[ (df7['age_rec']==2) ].groupby(['income'])['Bayesianreasoning_1'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==1)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==2)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==3) & (df7['isced']==3)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Bayesianreasoning_1', xlabel='INCOME 36 to 59', yticks = np.arange(0, 5 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ df7['age_rec']==3 ].groupby(['income'])['Bayesianreasoning_1'].mean(), yerr = df7.loc[ (df7['age_rec']==3) ].groupby(['income'])['Bayesianreasoning_1'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
fig, ax3 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==1)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==2)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age_rec']==4) & (df7['isced']==3)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax3, label='ISCED = 3')
ax3.set(ylabel='Bayesianreasoning_1', xlabel='INCOME 60 to 75', yticks = np.arange(0, 5 , 1))
ax3.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
ax3.errorbar(x = df7.loc[ df7['age_rec']== 4 ]['income'].unique(), y = df7.loc[ df7['age_rec']==4 ].groupby(['income'])['Bayesianreasoning_1'].mean(), yerr = df7.loc[ (df7['age_rec']==4) ].groupby(['income'])['Bayesianreasoning_1'].sem(), fmt='o', color = lighten_color('gray',0.5))
plt.show()
In [123]:
# Age median method
# Age_median
In [124]:
age_m = df1["age"].median()
age_m
Out[124]:
27.0
In [ ]:
In [125]:
df7["income"].value_counts()
Out[125]:
income 1 199 2 106 3 35 4 15 6 9 5 6 Name: count, dtype: int64
In [ ]:
In [ ]:
In [126]:
df7.loc[(df7['age'] < age_m)].describe()
Out[126]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 175.000000 | 175.0 | 175.000000 | 175.000000 | 1.750000e+02 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 | 175.000000 |
| mean | 21.868571 | 2.0 | 2.034286 | 1.720000 | 4.902680e+06 | 1.662857 | 1.897143 | 1.497143 | 1.417143 | 0.320000 | 55.428571 | 47.428571 | 49.904762 | 47.238095 | 32.000000 | 6.794286 | 48.530612 |
| std | 2.586411 | 0.0 | 0.614921 | 1.196931 | 1.658430e+07 | 0.731510 | 1.322112 | 0.964152 | 0.494502 | 0.467815 | 24.383663 | 33.052801 | 32.138407 | 16.483393 | 46.781468 | 2.134277 | 15.244838 |
| min | 16.000000 | 2.0 | 1.000000 | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 1.000000 | 7.142857 |
| 25% | 20.000000 | 2.0 | 2.000000 | 1.000000 | 1.000000e+05 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 33.333333 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 50% | 22.000000 | 2.0 | 2.000000 | 1.000000 | 5.000000e+05 | 2.000000 | 2.000000 | 2.000000 | 1.000000 | 0.000000 | 66.666667 | 50.000000 | 66.666667 | 33.333333 | 0.000000 | 6.000000 | 42.857143 |
| 75% | 24.000000 | 2.0 | 2.000000 | 2.000000 | 1.800000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 26.000000 | 2.0 | 3.000000 | 6.000000 | 1.500000e+08 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 11.000000 | 78.571429 |
In [127]:
df7.loc[(df7['age'] > age_m)].describe()
Out[127]:
| age | age_rec | isced | income | wealth | Certainty_3 | RiskComprehension_4 | GraphLiteracy_3 | Numeracy_3 | Bayesianreasoning_1 | Certainty_% | RiskComprehension_% | GraphLiteracy_% | Numeracy_% | Bayesianreasoning_% | TotalScore_14 | TotalScore_% | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 1.770000e+02 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 | 177.000000 |
| mean | 36.067797 | 2.429379 | 1.525424 | 1.847458 | 3.937876e+06 | 1.497175 | 1.711864 | 1.491525 | 1.468927 | 0.293785 | 49.905838 | 42.796610 | 49.717514 | 48.964218 | 29.378531 | 6.463277 | 46.166263 |
| std | 8.701223 | 0.570920 | 0.564759 | 1.078934 | 9.981187e+06 | 0.798609 | 1.197310 | 1.023134 | 0.500449 | 0.456787 | 26.620294 | 29.932754 | 34.104460 | 16.681640 | 45.678697 | 1.994326 | 14.245187 |
| min | 28.000000 | 2.000000 | 1.000000 | 1.000000 | 0.000000e+00 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 33.333333 | 0.000000 | 2.000000 | 14.285714 |
| 25% | 30.000000 | 2.000000 | 1.000000 | 1.000000 | 1.650000e+05 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 0.000000 | 33.333333 | 25.000000 | 33.333333 | 33.333333 | 0.000000 | 5.000000 | 35.714286 |
| 50% | 33.000000 | 2.000000 | 1.000000 | 2.000000 | 6.000000e+05 | 2.000000 | 2.000000 | 1.000000 | 1.000000 | 0.000000 | 66.666667 | 50.000000 | 33.333333 | 33.333333 | 0.000000 | 6.000000 | 42.857143 |
| 75% | 39.000000 | 3.000000 | 2.000000 | 2.000000 | 4.500000e+06 | 2.000000 | 3.000000 | 2.000000 | 2.000000 | 1.000000 | 66.666667 | 75.000000 | 66.666667 | 66.666667 | 100.000000 | 8.000000 | 57.142857 |
| max | 70.000000 | 4.000000 | 3.000000 | 6.000000 | 1.000000e+08 | 3.000000 | 4.000000 | 3.000000 | 2.000000 | 1.000000 | 100.000000 | 100.000000 | 100.000000 | 66.666667 | 100.000000 | 12.000000 | 85.714286 |
In [ ]:
In [128]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.set(ylabel='Total Scores_19', xlabel='INCOME for < median age')
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.set(ylabel='Total Scores_19', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [129]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['TotalScore_14'].mean(), yerr = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['TotalScore_14'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='Total Scores_19', xlabel='INCOME for < median age', yticks = np.arange(5, 16 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['TotalScore_14'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['TotalScore_14'].mean(), yerr = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['TotalScore_14'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax2.set(ylabel='Total Scores_19', xlabel='INCOME for > median age', yticks = np.arange(5, 16 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [130]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Certainty_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Certainty_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Certainty_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['Certainty_3'].mean(), yerr = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['Certainty_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='Certainty_3', xlabel='INCOME for < median age', yticks = np.arange(0, 6 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Certainty_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Certainty_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Certainty_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['Certainty_3'].mean(), yerr = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['Certainty_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax2.set(ylabel='Certainty_3', xlabel='INCOME for > median age')
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [131]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['RiskComprehension_4'].mean(), yerr = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['RiskComprehension_4'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='RiskComprehension_4', xlabel='INCOME for < median age', yticks = np.arange(0, 3 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['RiskComprehension_4'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['RiskComprehension_4'].mean(), yerr = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['RiskComprehension_4'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax2.set(ylabel='RiskComprehension_4', xlabel='INCOME for > median age', yticks = np.arange(0, 3 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [132]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['Numeracy_3'].mean(), yerr = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['Numeracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='Numeracy_3', xlabel='INCOME for < median age', yticks = np.arange(0, 6 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Numeracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['Numeracy_3'].mean(), yerr = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['Numeracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax2.set(ylabel='Numeracy_3', xlabel='INCOME for > median age', yticks = np.arange(0, 6 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [133]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['GraphLiteracy_3'].mean(), yerr = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['GraphLiteracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='GraphLiteracy_3', xlabel='INCOME for < median age', yticks = np.arange(0, 4 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['GraphLiteracy_3'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['GraphLiteracy_3'].mean(), yerr = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['GraphLiteracy_3'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax2.set(ylabel='GraphLiteracy_3', xlabel='INCOME for > median age', yticks = np.arange(0, 4 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [134]:
fig, ax1 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==1)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==2)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] < age_m) & (df7['isced']==3)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax1, label='ISCED = 3')
ax1.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['Bayesianreasoning_1'].mean(), yerr = df7.loc[ (df7['age'] < age_m) ].groupby(['income'])['Bayesianreasoning_1'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax1.set(ylabel='Bayesianreasoning_1', xlabel='INCOME for < median age', yticks = np.arange(0, 5 , 1))
ax1.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
fig, ax2 = plt.subplots(figsize=(6, 6))
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==1)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 1')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==2)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 2')
sns.regplot (scatter = False, data = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)].reset_index(drop = True), x = 'income', y = df7.loc[(df7['age'] > age_m) & (df7['isced']==3)]['Bayesianreasoning_1'], fit_reg=True, ci=None, ax=ax2, label='ISCED = 3')
ax2.errorbar(x = [1,2,3,4,5,6], y = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['Bayesianreasoning_1'].mean(), yerr = df7.loc[ (df7['age'] > age_m) ].groupby(['income'])['Bayesianreasoning_1'].sem(), fmt='o', color = lighten_color('gray',0.5))
ax2.set(ylabel='Bayesianreasoning_1', xlabel='INCOME for > median age', yticks = np.arange(0, 5 , 1))
ax2.legend(loc='center left',bbox_to_anchor=(1.0, 0.5))
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: